Abstract
“Human peripheral blood mononuclear cells PBMCs of a healthy female donor aged 25 were obtained by 10x Genomics from AllCells Whole transcriptome libraries were generated with Chromium Next GEM Single Cell 3’ Reagent Kits v3 1 Dual Index User Guide CG000315 and sequenced on an Illumina NovaSeq 6000”###############################################################################
## Load packages ##
library(tidyverse)
library(dplyr)
library(Seurat)
library(ggplot2)
library(tidyr)
library(knitr)
## Set documentation parameters ##
VersionPdfExt <- paste0(".V", gsub("-", "", Sys.Date()), ".pdf")
if (dir.exists("/Volumes/babs/working/boeings/")){
hpc.mount <- "/Volumes/babs/working/boeings/"
} else if (dir.exists("Y:/working/boeings/")){
hpc.mount <- "Y:/working/boeings/"
} else if (dir.exists("/camp/stp/babs/working/boeings/")){
hpc.mount <- "/camp/stp/babs/working/boeings/"
} else {
hpc.mount <- ""
}
## Load custom packages specific for this analysis ##
source("assets/scTools.r")
source("assets/SBwebtools.pckg.r")
## Set database parameters ##
FN <- paste0(hpc.mount, "Projects/reference_data/documentation/BC.parameters.txt")
dbTable <- read.delim(
FN,
sep = "\t",
stringsAsFactors = F
)
db.pwd <- as.vector(dbTable[1,1])
## Set filename for temp pdf files ##
if (length(.libPaths()) > 2){
.libPaths(.libPaths()[2:3])
}
###############################################################################
## Load Obio object from step A ##
ObioFN <- paste0(
"../",
list.files("..")[grep(".bioLOGIC.Robj", list.files(".."))]
)
load(ObioFN)
## Activate debug mode on subset of cells < 1
Obio@parameterList$debugReduce <- 0.1
## Set project specific parameters ##
Obio <- setMountingPoint(Obio)
Obio <- setAnalysisPaths(Obio)
Obio <- setCrickGenomeAndGeneNameTable(Obio)
Obio <- createAnalysisFolders(
Obio,
baseDir="/camp/stp/babs/working/boeings/Projects/",
localBaseDir = paste0(hpc.mount, "Projects/")
)
Obio <- setDataBaseParameters(Obio)
Obio <- addGeneAnnotation(Obio)
## Create Sample List ##
if (Obio@parameterList$hpcMount != "/camp/stp/babs/working/boeings/"){
for (i in 1:length(Obio@sampleDetailList)){
Obio@sampleDetailList[[i]]$path <- gsub("/camp/stp/babs/working/boeings/",Obio@parameterList$hpcMount, Obio@sampleDetailList[[i]]$path)
}
}
## Create outputfolders ##
if (!dir.exists(paste0(Obio@parameterList$localWorkDir,Obio@parameterList$project_id))){
dir.create(paste0(Obio@parameterList$localWorkDir,Obio@parameterList$project_id))
}
Obio@parameterList[["html_local"]] <- paste0(Obio@parameterList$localWorkDir, "html_local/")
if (!dir.exists(Obio@parameterList[["html_local"]])){
dir.create(Obio@parameterList[["html_local"]])
}
Obio@parameterList[["reportFigDir"]] <- paste0(Obio@parameterList$html_local, "report_figures/")
pdfTemp <- paste0(Obio@parameterList$reportFigDir, "temp.pdf")
if (!dir.exists(Obio@parameterList$reportFigDir)){
dir.create(Obio@parameterList$reportFigDir)
}
figureCount <- 1
## Load R module load R/3.5.1-foss-2018b ##
#setwd(Obio@parameterList$localWorkDir)
if (Obio@parameterList$host == "10.27.241.234"){
urlString <- "biologic.thecrick.org"
} else {
urlString <- "biologic.crick.ac.uk"
}
## Debugging mode on subset of cells ##
# Obio@parameterList$d
###############################################################################
## Set gene reference list ##
Obio@dataTableList[["referenceList"]] <- list()
if (is.null(Obio@parameterList$HmDisplayCatsFromDb)){
Obio@parameterList$HmDisplayCatsFromDb <- list(
"TFs" = "ag_lab_categories__10"
)
}
## Add transcription factors that happen to be cluster markers ##
catList <- Obio@parameterList$HmDisplayCatsFromDb
if (Obio@parameterList$geneIDcolumn != "mgi_symbol"
& Obio@parameterList$geneIDcolumn != "hgnc_symbol") {
queryGS <- "hgnc_symbol"
} else {
queryGS <- Obio@parameterList$geneIDcolumn
}
for (i in 1:length(catList)){
tempVec <- retrieve.gene.category.from.db(
cat_id = catList[[i]],
password = db.pwd,
gene.symbol = queryGS,
user = Obio@parameterList$db.user,
host = Obio@parameterList$host
)
Obio@dataTableList$referenceList[[names(catList)[i]]] <- tempVec
}
## Done ##
###############################################################################
## Load Seurat Object cretated in part A
###############################################################################
## Create sample list filtered on MT and norm_counts_RNA ##
SampleList <- createNormSampleList(
obj = Obio,
reduce = Obio@parameterList$debugReduce, # Default is NULL
vars.to.regress = Obio@parameterList$vars.to.regress,
s.genes = NULL,
g2m.genes = NULL
)
print(paste0("Cell Recuction: ", Obio@parameterList$debugReduce))
lapply(SampleList, dim)
## Done ##
###############################################################################
###############################################################################
## Add doublet annotation, if present, to meta data ##
pos <- grep("DF_resultlist", names(Obio@dataTableList))
if (length(pos) > 0){
sampleNames <- names(SampleList)
for (i in 1:length(SampleList)){
dfAdd <- Obio@dataTableList[["DF_resultlist"]][[sampleNames[i]]]
row.names(dfAdd) <- gsub("-1", "",row.names(dfAdd))
dfAdd <- dfAdd[row.names(dfAdd) %in% row.names(SampleList[[i]]@meta.data),]
SampleList[[i]] <- addDf2seuratMetaData(
obj = SampleList[[i]],
dfAdd = dfAdd
)
}
}
## Done ##
###############################################################################
###############################################################################
## Integrate Datasets ##
if (length(SampleList) > 1){
if (Obio@parameterList$scIntegrationMethod == "SCT"){
if (length(grep("scNintegrationFeatures", names(Obio@parameterList))) == 0){
Obio@parameterList$scNintegrationFeatures = 3000
}
library(future)
options(future.globals.maxSize = 14000 * 1024^2)
plan("multiprocess", workers = 30)
sample.features <- SelectIntegrationFeatures(
object.list = SampleList,
nfeatures = Obio@parameterList$scNintegrationFeatures
)
SampleList <- PrepSCTIntegration(
object.list = SampleList,
anchor.features = sample.features,
verbose = FALSE
)
sampleAnchors <- FindIntegrationAnchors(
object.list = SampleList,
normalization.method = "SCT",
anchor.features = sample.features,
verbose = FALSE
)
OsC <- IntegrateData(
anchorset = sampleAnchors,
normalization.method = "SCT",
verbose = FALSE
)
detach("package:future", unload=TRUE)
} else {
sampleAnchors <- FindIntegrationAnchors(
object.list = SampleList,
dims = 1:30
)
OsC <- IntegrateData(
#features.to.integrate = geneIntersectVec,
anchorset = sampleAnchors,
dims = 1:30
)
}
Obio@dataTableList$referenceList[["sampleAnchors"]] <- as.vector(sort(sampleAnchors@anchor.features))
} else {
OsC <- SampleList[[1]]
}
Idents(OsC) <- "sampleID"
Idents(OsC) <- factor(Idents(OsC), levels = names(Obio@sampleDetailList))
#OsC@meta.data$sampleID <- factor(OsC@meta.data$sampleID, levels = names(Obio@sampleDetailList))
OsC@meta.data[["cellID"]] <- row.names(OsC@meta.data)
save(OsC,
file = paste0(
Obio@parameterList$localWorkDir,
Obio@parameterList$project_id,
".Seurat.Robj"
)
)
print("R bioLOGIC single cell object initialized.")
###############################################################################
## Setting plotting parameters ##
dotsize = 1
if (nrow(OsC@meta.data) > 10000){
dotsize = 0.75
} else if (nrow(OsC@meta.data) > 20000){
dotsize = 0.5
} else if (nrow(OsC@meta.data) > 50000){
dotsize = 0.25
}
legendDotSize <- 5
dendrofactor <- 5
## Setting plotting parameters ##
###############################################################################
plotList <- list()
chnkVec <- as.vector(NULL, mode = "character")
## First make variation plot for integrated samples, than for all individual samples separately
tag <- "Integrated_Samples"
DefaultAssay(OsC) <- "RNA"
OsC <- FindVariableFeatures(
object = OsC,
selection.method = 'vst',
nfeatures = Obio@parameterList$NtopGenes
)
# Identify the 10 most highly variable genes
label2000 <- paste0("integrated", "_", "top2000var")
Obio@dataTableList$referenceList[[ label2000]]<- head(
x = VariableFeatures(object = OsC),
2000
)
label30 <- paste0("integrated", "_", "top30var")
Obio@dataTableList$referenceList[[ label30]]<- head(
x = VariableFeatures(object = OsC),
30
)
## slot for variable features OsC@assays$RNA@var.features
dfVar <- OsC@assays$RNA@meta.features
names(dfVar) <- gsub("vst.", "",names(dfVar))
dfVar[["gene"]] <- row.names(dfVar)
OsC@meta.data[["all"]] <- "all"
Idents(OsC) <- "all"
cluster.averages <- AverageExpression(
OsC,
return.seurat = TRUE
)
Idents(OsC) <- "sampleID"
dfAvgExpr <- data.frame(cluster.averages[["RNA"]]@data)
dfAvgExpr[["gene"]] <- row.names(dfAvgExpr)
names(dfAvgExpr)[1] <- "Avg.Expression"
dfVar <- merge(
dfVar,
dfAvgExpr,
by.x = "gene",
by.y = "gene"
)
dfVar[["Type"]] <- "Standard"
dfVar[dfVar$gene %in% OsC@assays$RNA@var.features, "Type"] <- "Most Variable"
dfVar[["text"]] <- ""
dfVar[dfVar$gene %in% as.vector(Obio@dataTableList$referenceList[[label30]]), "text"] <- dfVar[dfVar$gene %in% as.vector(Obio@dataTableList$referenceList[[label30]]), "gene"]
#dotsize <- 0.5
library(ggrepel)
plotList[[tag]] <- ggplot(
data = dfVar,
aes(
x=Avg.Expression,
y=variance.standardized, label = text, color = Type
)
) + geom_point( shape=16, size = dotsize
) + xlab("Average Expression") + ylab("Variance Standarized"
) + theme_bw(
) + theme(
axis.text.y = element_text(size=8),
axis.text.x = element_text(size=8),
axis.title.y = element_text(size=8),
axis.title.x = element_text(size=8),
axis.line = element_line(colour = "black"),
panel.border = element_rect(colour = "black", fill=NA, size=1),
plot.title = element_text(hjust = 0.5, size = 12)
) + guides(col = guide_legend(override.aes = list(shape = 16, size = legendDotSize))
) + ggtitle(paste0("Variance vs. Expression in the Overall Experiment")
) + scale_color_manual(values=c("#FF0000", "#000000")
) + geom_text_repel(
)
###########################################################################
## Save plot to file ##
FNbase <- paste0("variation.integrated.samples", VersionPdfExt)
FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
FNrel <- paste0("report_figures/", FNbase)
pdf(FN)
print(plotList[[tag]])
dev.off()
## ##
###########################################################################
link <- paste0('<a href="https://',urlString,'/',Obio@parameterList$project_id,'/scatterplot?x_axis=',paste0(tag, '_AvgExpr'),'&y_axis=',paste0(tag, '_var_std" target = "_blank">here</a>'))
figCap <- paste0(
'**Figure ',
figureCount,
':** Variance versus averaged gene expression for overall sample.',
names(SampleList)[i],
'. ',
'Download a pdf of this figure <a href="',FNrel,'" target="_blank">here</a>. ',
'An interactive version of this figure can be found ', link, '. '
)
NewChnk <- paste0(
"#### ",tag,
"\n```{r varplot_",tag,", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",figCap,"'}\n",
"\n",
"\n print(plotList[['",tag,"']])",
"\n cat( '\n')",
"\n\n\n```\n"
)
## Histogram Part C done ##
###########################################################################
chnkVec <- c(
chnkVec,
NewChnk
)
figureCount <- figureCount + 1
print(" All variation done.")
## Now the individual samples ##
xmax <- ceiling(max(dfVar$Avg.Expression))
ymax <- ceiling(max(dfVar$variance.standardized))
dfVarRes <- unique(
dfVar[,c("gene", "Avg.Expression", "variance.standardized")]
)
names(dfVarRes) <- gsub(
"Avg.Expression", paste0(tag, "_AvgExpr"), names(dfVarRes)
)
names(dfVarRes) <- gsub(
"variance.standardized", paste0(tag, "_var_std"), names(dfVarRes)
)
## Make variation plot for individual samples ##
if (length(SampleList) > 1){
for (i in 1:length(SampleList)){
tag <- paste0("Ind_var_", names(SampleList)[i])
DefaultAssay(SampleList[[i]]) <- "RNA"
SampleList[[i]] <- FindVariableFeatures(object = SampleList[[i]])
SampleList[[i]] <- FindVariableFeatures(
object = SampleList[[i]],
selection.method = 'vst',
nfeatures = 2000
)
# Identify the 10 most highly variable genes
label2000 <- paste0(names(SampleList)[i], "_", "top2000var")
Obio@dataTableList$referenceList[[ label2000]]<- head(
x = VariableFeatures(object = SampleList[[i]]),
2000
)
label30 <- paste0(names(SampleList)[i], "_", "top30var")
Obio@dataTableList$referenceList[[ label30]]<- head(
x = VariableFeatures(object = SampleList[[i]]),
30
)
## slot for variable features OsC@assays$RNA@var.features
dfVar <- SampleList[[i]]@assays$RNA@meta.features
names(dfVar) <- gsub("vst.", "",names(dfVar))
dfVar[["gene"]] <- row.names(dfVar)
Idents(SampleList[[i]]) <- "sampleID"
cluster.averages <- AverageExpression(
SampleList[[i]],
return.seurat = TRUE
)
dfAvgExpr <- data.frame(cluster.averages[["RNA"]]@data)
dfAvgExpr[["gene"]] <- row.names(dfAvgExpr)
names(dfAvgExpr)[1] <- "Avg.Expression"
dfVar <- merge(
dfVar,
dfAvgExpr,
by.x = "gene",
by.y = "gene"
)
dfVar[["Type"]] <- "Standard"
dfVar[dfVar$gene %in% SampleList[[i]]@assays$RNA@var.features, "Type"] <- "Most Variable"
dfVar[["text"]] <- ""
dfVar[dfVar$gene %in% as.vector(Obio@dataTableList$referenceList[[label30]]), "text"] <- dfVar[dfVar$gene %in% as.vector(Obio@dataTableList$referenceList[[label30]]), "gene"]
# dotsize <- 0.5
library(ggrepel)
plotList[[tag]] <- ggplot(
data = dfVar,
aes(
x=Avg.Expression,
y=variance.standardized, label = text, color = Type)
) + geom_point( shape=16, size = dotsize
) + xlab("Average Expression") + ylab("Variance Standarized"
) + theme_bw(
) + theme(
axis.text.y = element_text(size=8),
axis.text.x = element_text(size=8),
axis.title.y = element_text(size=8),
axis.title.x = element_text(size=8),
axis.line = element_line(colour = "black"),
panel.border = element_rect(colour = "black", fill=NA, size=1),
plot.title = element_text(hjust = 0.5, size = 12)
) + guides(col = guide_legend(override.aes = list(shape = 16, size = legendDotSize))
) + ggtitle(paste0("Individual variance in the ", names(SampleList)[i], " sample.")
) + scale_color_manual(values=c("#FF0000", "#000000")
) + geom_text_repel(
) + xlim(0, xmax) + ylim(0, ymax
)
###########################################################################
## Save plot to file ##
FNbase <- paste0("Individual.var.features",names(SampleList)[i], VersionPdfExt)
FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
FNrel <- paste0("report_figures/", FNbase)
pdf(FN)
print(plotList[[tag]])
dev.off()
## ##
###########################################################################
link <- paste0('<a href="https://',urlString,'/',Obio@parameterList$project_id,'/scatterplot?x_axis=',paste0(tag, '_AvgExpr'),'&y_axis=',paste0(tag, '_var_std" target="_blank">here</a>'))
figCap <- paste0(
'**Figure ',
figureCount,
':** Variance versus averaged gene expression for sample ',
names(SampleList)[i],
'. ',
'Download a pdf of this figure <a href="',FNrel,'" target="_blank">here</a>. ',
'An interactive version of this figure can be found ', link, '. '
)
NewChnk <- paste0(
"#### ",tag,
"\n```{r varplot_",tag,", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",figCap,"'}\n",
"\n",
"\n print(plotList[['",tag,"']])",
"\n cat( '\n')",
"\n\n\n```\n"
)
## Histogram Part C done ##
###########################################################################
chnkVec <- c(
chnkVec,
NewChnk
)
figureCount <- figureCount + 1
print(paste0(names(SampleList)[i], " individual variation done."))
## Add to result table ##
dfVarTemp <- unique(
dfVar[,c("gene", "Avg.Expression", "variance.standardized")]
)
names(dfVarTemp) <- gsub(
"Avg.Expression", paste0(tag, "_AvgExpr"), names(dfVarTemp)
)
names(dfVarTemp) <- gsub(
"variance.standardized", paste0(tag, "_var_std"), names(dfVarTemp)
)
dfVarRes <- merge(
dfVarRes,
dfVarTemp,
by.x = "gene",
by.y = "gene",
all = TRUE
)
dfVarRes[is.na(dfVarRes)] <- 0
}
} # end if multiple samples
## Make sure summary goes first ##
Obio@dataTableList[["dfVariation"]] <- data.frame(NULL)
Obio@dataTableList[["dfVariation"]] <- dfVarRes
if (length(plotList) > 3){
tabVar <- ".tabset .tabset-fade .tabset-dropdown"
} else {
tabVar <- ".tabset .tabset-fade .tabset-pills"
}
## plot list will be integrated in full figure ##
cat(paste(knit(text = chnkVec, quiet = T), collapse = '\n'))
Figure 1: Variance versus averaged gene expression for overall sample.PBMC10X. Download a pdf of this figure here. An interactive version of this figure can be found here.
###############################################################################
## Perform integrated analysis ##
if (length(Obio@sampleDetailList) > 1){
DefaultAssay(OsC) <- "integrated"
} else {
Obio@parameterList$singleCellClusterString <- gsub("integrated", "RNA", Obio@parameterList$singleCellClusterString)
DefaultAssay(OsC) <- "RNA"
}
# Run the standard workflow for visualization and clustering
## This will scale on the most variable features only
OsC <- ScaleData(OsC, verbose = FALSE)
OsC <- RunPCA(
OsC,
npcs = Obio@parameterList$singleCellSeuratNpcs4PCA, verbose = FALSE
)
# t-SNE and Clustering
## Add PCA clusters to data collection ##
OsC <- RunUMAP(OsC, reduction = "pca", dims = 1:20)
OsC <- RunTSNE(OsC, reduction = "pca", dims = 1:20)
OsC <- FindNeighbors(OsC, reduction = "pca", dims = 1:20)
OsC <- FindClusters(OsC, resolution = Obio@parameterList$singleCellClusterParameter)
## Rational: Run PCA on variable features, then scale data for heatmaps and other applications
if (length(Obio@sampleDetailList) > 1){
DefaultAssay(OsC) <- Obio@parameterList$scIntegrationMethod
allGenes <- rownames(x = OsC@assays[[Obio@parameterList$scIntegrationMethod]])
OsC <- ScaleData(OsC, verbose = FALSE, features=allGenes)
}
DefaultAssay(OsC) <- "RNA"
allGenes <- rownames(x = OsC@assays$RNA)
OsC <- ScaleData(OsC, verbose = FALSE, features=allGenes)
## Save minimal Seurat object ##
save(OsC,
file = paste0(
Obio@parameterList$localWorkDir,
Obio@parameterList$project_id,
".Seurat.Robj"
)
)
###############################################################################
## Add custom cluster annotation if specified ##
FNcol <- paste0(getwd(), "/design/customClusterAnnotation.txt")
tableValid <- FALSE
if (file.exists(FNcol)){
dfClusterAnnotation <- read.delim(
FNcol,
header = T,
sep = "\t",
stringsAsFactors = F
)
## Check column names ##
pos1 <- grep("seurat_clusters", names(dfClusterAnnotation))
pos2 <- grep("clusterName", names(dfClusterAnnotation))
pos3 <- grep("clusterColor", names(dfClusterAnnotation))
## Check if clusters match ##
inClust <- unique(as.character(dfClusterAnnotation$seurat_clusters))
oClust <- unique(as.character(OsC@meta.data$seurat_clusters))
if (length(pos1) != 0 & length(pos2) != 0 & length(pos3) != 0 ){
## Remove spaces ##
dfClusterAnnotation$clusterName <- gsub(" ", "_", dfClusterAnnotation$clusterName)
dfClusterAnnotation$clusterName <- gsub("\\.", "_", dfClusterAnnotation$clusterName)
dfClusterAnnotation$clusterName <- gsub("-", "_", dfClusterAnnotation$clusterName)
dfClusterAnnotation$clusterName <- gsub("__", "_", dfClusterAnnotation$clusterName)
dfClusterAnnotation$clusterName <- gsub("_$", "", dfClusterAnnotation$clusterName)
## Check if clusters match ##
inClust <- unique(as.character(dfClusterAnnotation$seurat_clusters))
oClust <- unique(as.character(OsC@meta.data$seurat_clusters))
if (length(inClust) == length(intersect(inClust, oClust))){
tableValid <- TRUE
} else {
tableValid <- FALSE
}
} else {
tableValid <- FALSE
}
}
if (!file.exists(FNcol) | !tableValid){
dfClusterAnnotation <- OsC@meta.data[,c("cellID", "seurat_clusters")]
levels <- as.vector(sort(unique(dfClusterAnnotation$seurat_clusters)))
clusterOrder <- as.numeric(sort(unique(dfClusterAnnotation$seurat_clusters)))
library(scales)
clusterCols <- hue_pal()(length(levels))
dfClusterAnnotation <- data.frame(
seurat_clusters = levels,
clusterName = paste0("C", levels),
clusterColor = clusterCols,
clusterOrder = clusterOrder,
stringsAsFactors = F
)
#row.names(dfClusterAnnotation) <- dfClusterAnnotation$cellID
#dfClusterAnnotation$cellID <- NULL
## Add sample colors ##
annoDir <- paste0(getwd(), "/design")
if (!dir.exists(annoDir)){
dir.create(annoDir)
}
FNcol <- paste0(annoDir, "/clusterAnnotation.txt")
write.table(
dfClusterAnnotation,
FNcol,
row.names=F,
sep = "\t"
)
}
## Make sure dfClusterAnnotation has a clusterOrder column
pos <- grep("^clusterOrder$", names(dfClusterAnnotation))
if (length(pos) ==1){
dfClusterAnnotation <- dfClusterAnnotation[order(dfClusterAnnotation$clusterOrder, decreasing = F),]
orderVec <- dfClusterAnnotation$clusterName
Obio@parameterList[["clusterNameOrder"]] <- orderVec
} else {
###############################################################################
## Order average expression by cluster similarity (most variable genes) ##
assay <- NULL
assay <- assay %||% DefaultAssay(object = OsC)
features <- VariableFeatures(object = OsC)
features <- intersect(x = features, y = rownames(x = OsC))
data.avg <- AverageExpression(object = OsC, assays = assay,
features = features, slot = "data", verbose = T)[[1]]
if (ncol(data.avg) > 1){
data.dist <- dist(x = t(x = data.avg[features, ]))
dforder <- hclust(d = data.dist)
orderVec <- names(dfAvgExpr)[dforder$order]
} else {
orderVec <- names(data.avg)
}
Obio@parameterList[["clusterNameOrder"]] <- orderVec
}
###############################################################################
## Merge into OsC@meta.data ##
dfAdd <- OsC@meta.data[,c("seurat_clusters", "cellID")]
dfAdd$seurat_clusters <- as.character(dfAdd$seurat_clusters)
dfClusterAnnotation$seurat_clusters <- as.character(dfClusterAnnotation$seurat_clusters)
dfClusterAnnotation$clusterOrder <- NULL
dfAdd <- merge(
dfAdd,
dfClusterAnnotation,
by.x = "seurat_clusters",
by.y = "seurat_clusters"
)
dfAdd <- data.frame(dfAdd, stringsAsFactors = F)
row.names(dfAdd) <- dfAdd$cellID
dfAdd$cellID <- NULL
dfAdd$seurat_clusters <- NULL
OsC <- addDf2seuratMetaData(
obj = OsC,
dfAdd = dfAdd
)
Idents(OsC) <- "clusterName"
Idents(OsC) <- factor(Idents(OsC), levels = Obio@parameterList[["clusterNameOrder"]])
#OsC@meta.data$seurat_clusters <- factor(OsC@meta.data$seurat_clusters, levels = c(unique(as.numeric(dfClusterAnnotation$seurat_clusters))))
## Done ##
###############################################################################
###############################################################################
## Add custom sample annotation if specified ##
FNsample <- paste0(getwd(), "/design/customSampleAnnotation.txt")
tableValid <- FALSE
if (file.exists(FNsample)){
dfSampleAnnotation <- read.delim(
FNsample,
header = T,
sep = "\t",
stringsAsFactors = F
)
## Check column names ##
pos1 <- grep("sampleID", names(dfSampleAnnotation))
pos2 <- grep("sampleName", names(dfSampleAnnotation))
pos3 <- grep("sampleColor", names(dfSampleAnnotation))
if (length(pos1) != 0 & length(pos2) != 0 & length(pos3) != 0){
## Edit sample annotation ##
dfSampleAnnotation$sampleName <- gsub(" ", "_", dfSampleAnnotation$sampleName)
dfSampleAnnotation$sampleName <- gsub("\\.", "_", dfSampleAnnotation$sampleName)
dfSampleAnnotation$sampleName <- gsub("-", "_", dfSampleAnnotation$sampleName)
dfSampleAnnotation$sampleName <- gsub("__", "_", dfSampleAnnotation$sampleName)
dfSampleAnnotation$sampleName <- gsub("_$", "", dfSampleAnnotation$sampleName)
## Check if clusters match ##
inClust <- unique(as.vector(dfSampleAnnotation$sampleID))
oClust <- unique(as.vector(OsC@meta.data$sampleID))
if (length(inClust) == length(intersect(inClust, oClust))){
tableValid <- TRUE
} else {
tableValid <- FALSE
}
} else {
tableValid <- FALSE
}
}
if (!file.exists(FNsample) | !tableValid){
dfSampleAnnotation <- OsC@meta.data[,c("cellID", "sampleID")]
levels <- sort(unique(dfSampleAnnotation$sampleID))
library(scales)
sampleCols <- hue_pal()(length(levels))
dfSampleAnnotation <- data.frame(
sampleID = levels,
sampleName = levels,
sampleColor = sampleCols
)
#row.names(dfSampleAnnotation) <- dfSampleAnnotation$cellID
#dfClusterAnnotation$cellID <- NULL
## Add sample colors ##
annoDir <- paste0(getwd(), "/design")
if (!dir.exists(annoDir)){
dir.create(annoDir)
}
FNsample <- paste0(annoDir, "/sampleAnnotation.txt")
write.table(
dfSampleAnnotation,
FNsample,
row.names=F,
sep = "\t"
)
}
###############################################################################
## Merge into OsC@meta.data ##
dfAdd <- OsC@meta.data[,c("cellID", "sampleID")]
row.names(dfAdd) <- dfAdd$cellID
dfAdd <- merge(
dfAdd,
dfSampleAnnotation,
by.x = "sampleID",
by.y = "sampleID"
)
row.names(dfAdd) <- dfAdd$cellID
dfAdd$cellID <- NULL
dfAdd$sampleID <- NULL
OsC <- addDf2seuratMetaData(
obj = OsC,
dfAdd = dfAdd
)
## Done ##
###############################################################################
###############################################################################
## Find all markers ##
DefaultAssay(OsC) <- "RNA"
Idents(OsC) <- "clusterName"
lgFCthreshold <- 0.25
dfGeneralMarkers <- FindAllMarkers(
object = OsC,
only.pos = FALSE,
min.pct = 0.1,
logfc.threshold = lgFCthreshold,
test.use = "roc",
assay = "RNA",
slot = "data"
)
if (nrow(dfGeneralMarkers) == 0 | length(unique(dfGeneralMarkers$cluster)) < 2){
lgFCthreshold <- 0.00
dfGeneralMarkers <- FindAllMarkers(
object = OsC,
only.pos = FALSE,
min.pct = 0.01,
logfc.threshold = lgFCthreshold,
test.use = "roc",
assay = "RNA",
slot = "data"
)
}
if (nrow(dfGeneralMarkers) > 0 & length(unique(dfGeneralMarkers$cluster)) > 2){
dfGeneralMarkers[["direction"]] <- ""
dfGeneralMarkers[dfGeneralMarkers$avg_diff >= 0, "direction"] <- "positive"
dfGeneralMarkers[dfGeneralMarkers$avg_diff < 0, "direction"] <- "negative"
Obio@dataTableList[["dfGeneralMarkers"]] <- dfGeneralMarkers
dfGeneralMarkersFilt <- dfGeneralMarkers[dfGeneralMarkers$avg_diff > lgFCthreshold | dfGeneralMarkers$avg_diff < -lgFCthreshold,]
Obio@dataTableList[["dfGeneralMarkersFilt"]] <- dfGeneralMarkersFilt
dfGeneralMarkersPos <- dfGeneralMarkers[dfGeneralMarkers$direction == "positive", ]
dfTop1 <- data.frame(dfGeneralMarkersPos %>% group_by(cluster) %>% top_n(1, avg_diff))
dfTop5 <- data.frame(dfGeneralMarkersPos %>% group_by(cluster) %>% top_n(5, avg_diff))
dfTop10 <- data.frame(dfGeneralMarkersPos %>% group_by(cluster) %>% top_n(10, avg_diff))
dfTop50 <- data.frame(dfGeneralMarkersPos %>% group_by(cluster) %>% top_n(50, avg_diff))
Obio@dataTableList[["dfGeneralMarkersTop10"]] <- dfTop10
Obio@dataTableList$referenceList[["Top10clusterMarkers"]] <- as.vector(
unique(
dfTop10$gene
)
)
###############################################################################
## Upload general markers ##
library(tidyverse)
dfDat <- dfGeneralMarkers
dfDat <- unique(dfGeneralMarkers[,c("gene", "cluster")])
if (Obio@parameterList$geneIDcolumn != "hgnc_symbol" & Obio@parameterList$geneIDcolumn != "mgi_symbol"){
refGeneIDcolumn <- "hgnc_symbol"
dfAnno <- Obio@dfGeneAnnotation
dfAnno <- unique(dfAnno[,c("hgnc_symbol",Obio@parameterList$geneIDcolumn )])
dfAnno <- dfAnno[dfAnno[,Obio@parameterList$geneIDcolumn] %in% dfDat[,"gene"],]
dfDat <- merge(
dfDat,
dfAnno,
by.x = "gene",
by.y = Obio@parameterList$geneIDcolumn
)
dfDat$gene <- NULL
names(dfDat) <- gsub("hgnc_symbol", "gene",names(dfDat))
} else {
refGeneIDcolumn <- Obio@parameterList$geneIDcolumn
}
if (ncol(dfDat) > 1){
library(dplyr)
dfDat <- dfDat %>%
group_by(cluster) %>%
mutate(rn = row_number()) %>%
ungroup %>%
pivot_wider(names_from = cluster, values_from = gene, values_fill = "") %>% dplyr::select(-rn)
orderVec <- sort(names(dfDat))
dfDat <- data.frame(dfDat[,orderVec])
names(dfDat) <- paste0(
Obio@parameterList$project_id,
"_Marker_Genes_Cluster_",
names(dfDat)
)
dfDat <- data.frame(dfDat)
## Insert description row ##
descriptionRow <- data.frame(dfDat[1,])
descriptionRow[1,] <- t(names(dfDat))
descriptionRow[1,] <- paste0(descriptionRow[1,], " from Seurat FindAllMarkers.")
dfDat <- rbind(
descriptionRow,
dfDat
)
# dfCatRef <- names(dfCatRef)
# names(dfCatRef) <- names(dfDat)
#
# dfDat <- rbind(
# dfCatRef,
# dfDat
# )
#######################################################################
## Upload/update category by category ##
updatedCatIDs <- as.vector(NULL, mode = "character")
updatedCatNames <- as.vector(NULL, mode = "character")
for (i in 1:ncol(dfDat)){
cat.name <- names(dfDat)[i]
cat_type <- paste0("temp_cluster_marker_", Obio@parameterList$project_id)
cat.description.text <- as.vector(dfDat[1,i])
gene.vec <- as.vector(
dfDat[,i]
)[2:nrow(dfDat)]
gene.vec <- gene.vec[gene.vec != ""]
gene.vec <- sort(na.omit(gene.vec))
## Determine if cat exists ##
catID <- add.category.to.lab.reference.table.hs(
host = Obio@dbDetailList$host,
pwd = db.pwd,
user = Obio@dbDetailList$db.user,
cat.ref.db = Obio@dbDetailList$ref.cat.db,
cat.ref.db.table = Obio@parameterList$lab.categories.table,
gene.vector = gene.vec,
gene.id = refGeneIDcolumn, #options hgnc_symbol, mgi_symbol
mm.hs.conversion.file = "assets/annotation/homologene.data.txt",
cat_name = cat.name,
cat_type = cat_type,
data_source = paste0(Obio@parameterList$labname, " Lab"),
comments_1 = "",
comments_2 = "",
new.lab.category.table = FALSE,
cat.description.db = "internal_categories",
cat.description.db.table = "category_description",
cat.description.text = cat.description.text,
lab.name = Obio@parameterList$labname,
replaceExistingCatName = TRUE
)
updatedCatIDs <- c(
updatedCatIDs,
catID
)
updatedCatNames <- c(
updatedCatNames,
cat.name
)
} ## End dfDat loop
dfFAMplotIDs <- data.frame(
cat_id = updatedCatIDs,
cat_name = updatedCatNames,
stringsAsFactors = F
)
}
## Done uploading general markers ##
###############################################################################
} else {
Obio@dataTableList[["dfGeneralMarkers"]] <- NULL
}
#############################################################
If you wish to get a bit of background on tSNE dimensionality reduction, take a look at this youtube video by Josh Starmer from the University of North Carolina.
If you wish to get a bit of background on UMAP (and other) dimensionality reduction algorithms, take a look at this youtube video by recaping a lecture at the PyData 2018 conference.
reductionVec <- c("umap", "tsne")
plotList <- list()
chnkVec <- as.vector(NULL, mode = "character")
###############################################################################
## First UMAP all samples together ##
tag <- paste0("UMAP_sample_level")
dfPlot <- OsC@meta.data
pos <- grep("included", names(dfPlot))
if (length(pos) == 0){
dfPlot[["included"]] <- "+"
}
pos <- grep("cellID", names(dfPlot))
if (length(pos) == 0){
dfPlot[["cellID"]] <- row.names(dfPlot)
}
dfPlot$UMAP_1 <- NULL
dfPlot$UMAP_2 <- NULL
## Get UMAP coordinates ##
coord <- data.frame(OsC@reductions$umap@cell.embeddings)
coord[["cellID"]] <- row.names(coord)
coord <-coord[coord$cellID %in% dfPlot$cellID, ]
dfPlot <- merge(dfPlot, coord, by.x = "cellID", by.y="cellID", all=T)
dfPlot[is.na(dfPlot)] <- 0
dfPlot <- dfPlot[dfPlot$UMAP_1 != 0 & dfPlot$UMAP_2 != 0,]
## Add cluster colors ##
dfPlot[["Cluster"]] <- dfPlot$sampleName
clusterVec <- as.vector(sort(unique(dfPlot$sampleName)))
maxX <- 1.1*max(dfPlot$UMAP_1, na.rm = T)
minX <- 1.1*min(dfPlot$UMAP_1, na.rm = T)
maxY <- 1.1*max(dfPlot$UMAP_2, na.rm = T)
minY <- 1.1*min(dfPlot$UMAP_2, na.rm = T)
dfPair <- unique(OsC@meta.data[,c("sampleName","sampleID", "sampleColor")])
row.names(dfPair) <- dfPair$sampleID
#colVec <- dfPair$sampleColor
#names(colVec) <- dfPair$sampleName
dfPlot$Cluster <- factor(dfPlot$Cluster, levels = dfPair$sampleName)
# dotsize = 1
# if (nrow(dfPlot) > 10000){
# dotsize = 0.75
# } else if (nrow(dfPlot) > 20000){
# dotsize = 0.5
# } else if (nrow(dfPlot) > 50000){
# dotsize = 0.25
# }
plotList[[tag]] <- ggplot(
data=dfPlot[dfPlot$included == "+",],
aes(UMAP_1, UMAP_2, color=sampleName)
) + geom_point( shape=16, size = as.numeric(dotsize)
) + xlab("UMAP1") + ylab("UMAP2"
) + theme_bw(
) + theme(
axis.text.y = element_text(size=8),
axis.text.x = element_text(size=8),
axis.title.y = element_text(size=8),
axis.title.x = element_text(size=8),
axis.line = element_line(colour = "black"),
panel.border = element_rect(colour = "black", fill=NA, size=1),
plot.title = element_text(hjust = 0.5, size = 12),
legend.title = element_blank()
) + guides(col = guide_legend(override.aes = list(shape = 16, size = legendDotSize))
) + ggtitle(paste0("Sample: ", gsub("_", " ", tag))
) + xlim(minX, maxX) + ylim(minY, maxY
) + coord_fixed(ratio=1
)
h <- sum(c("sampleName", "sampleColor") %in% names(dfPlot))
if (h ==2){
dfCol <- unique(dfPlot[,c("sampleName", "sampleColor")])
colVec <- as.vector(dfCol$sampleColor)
names(colVec) <- as.vector(dfCol$sampleName)
plotList[[tag]] <- plotList[[tag]] + scale_colour_manual("Samples" ,values = colVec
) + guides(col = guide_legend(override.aes = list(shape = 16, size = legendDotSize))
)
}
## Add colors if specified ##
if (length(unique(dfPlot$Cluster)) > 15){
plotList[[tag]] <- plotList[[tag]] + theme(legend.position = "none")
}
FNbase <- paste0(tag, VersionPdfExt)
FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
FNrel <- paste0("report_figures/", FNbase)
pdf(FN)
print(plotList[[tag]])
dev.off()
link <- paste0('<a href="https://',urlString,'/',Obio@parameterList$project_id,'/pca?x_axis=UMAP_1&y_axis=UMAP_2" target="_blank">here</a>')
figLegend <- paste0(
'**Figure ',
figureCount,
':** ',
' UMAP showing all cells from all samples together. Download a pdf of this figure <a href="',FNrel,'" target="_blank">here</a>.',
'An interactive version of this figure can be found ', link, '. '
)
figureCount <- figureCount + 1
NewChnk <- paste0(
"#### ", tag,
"\n```{r Sample_UMAP_",
tag,", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
figLegend,"'}\n",
"\n",
"\n print(plotList[['",tag,"']])",
"\n cat( '\n')",
"\n\n\n```\n"
)
chnkVec <- c(
chnkVec,
NewChnk
)
## Done first umap all samples ##
###############################################################################
###############################################################################
## First tsne all samples together ##
tag <- paste0("tSNE_All_Samples")
dfPlot <- OsC@meta.data
pos <- grep("included", names(dfPlot))
if (length(pos) == 0){
dfPlot[["included"]] <- "+"
}
pos <- grep("cellID", names(dfPlot))
if (length(pos) == 0){
dfPlot[["cellID"]] <- row.names(dfPlot)
}
dfPlot$tSNE_1 <- NULL
dfPlot$tSNE_2 <- NULL
## Get tsNE coordinates ##
coord <- data.frame(OsC@reductions$tsne@cell.embeddings)
coord[["cellID"]] <- row.names(coord)
coord <-coord[coord$cellID %in% dfPlot$cellID, ]
dfPlot <- merge(dfPlot, coord, by.x = "cellID", by.y="cellID", all=T)
dfPlot[is.na(dfPlot)] <- 0
dfPlot <- dfPlot[dfPlot$tSNE_1 != 0 & dfPlot$tSNE_2 != 0,]
## Add cluster colors ##
dfPlot[["Cluster"]] <- dfPlot$sampleName
clusterVec <- as.vector(unique(sort(dfPlot$sampleName)))
maxX <- 1.1*max(dfPlot$tSNE_1, na.rm = T)
minX <- 1.1*min(dfPlot$tSNE_1, na.rm = T)
maxY <- 1.1*max(dfPlot$tSNE_2, na.rm = T)
minY <- 1.1*min(dfPlot$tSNE_2, na.rm = T)
# library(scales)
# clusterCols = hue_pal()(length(clusterVec))
# dfPlot$Cluster <- factor(dfPlot$Cluster, levels = clusterVec)
# dotsize = 1.5
# if (nrow(dfPlot) > 10000){
# dotsize = 0.75
# } else if (nrow(dfPlot) > 50000){
# dotsize = 0.5
# } else {
# dotsize = 0.25
# }
plotList[[tag]] <- ggplot(data=dfPlot[dfPlot$included == "+",], aes(tSNE_1, tSNE_2, color=sampleName)
) + geom_point( shape=16, size = as.numeric(dotsize)
) + xlab("tSNE1") + ylab("tSNE2"
) + theme_bw(
) + theme(
axis.text.y = element_text(size=8),
axis.text.x = element_text(size=8),
axis.title.y = element_text(size=8),
axis.title.x = element_text(size=8),
axis.line = element_line(colour = "black"),
panel.border = element_rect(colour = "black", fill=NA, size=1),
plot.title = element_text(hjust = 0.5, size = 12),
legend.title = element_blank()
) + ggtitle(paste0("Sample: ", tag)
) + guides(col = guide_legend(override.aes = list(shape = 16, size = legendDotSize))
) + xlim(minX, maxX) + ylim(minY, maxY
) + coord_fixed(ratio=1
)
h <- sum(c("sampleName", "sampleColor") %in% names(dfPlot))
if (h ==2){
dfCol <- unique(dfPlot[,c("sampleName", "sampleColor")])
colVec <- as.vector(dfCol$sampleColor)
names(colVec) <- as.vector(dfCol$sampleName)
plotList[[tag]] <- plotList[[tag]] + scale_colour_manual("Samples" ,values = colVec
) + guides(col = guide_legend(override.aes = list(shape = 16, size = legendDotSize))
)
}
if (length(unique(dfPlot$Cluster)) > 15){
plotList[[tag]] <- plotList[[tag]] + theme(legend.position = "none")
}
FNbase <- paste0(tag, VersionPdfExt)
FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
FNrel <- paste0("report_figures/", FNbase)
pdf(FN)
print(plotList[[tag]])
dev.off()
link <- paste0('<a href="https://',urlString,'/',Obio@parameterList$project_id,'/pca?x_axis=tSNE_1&y_axis=tSNE_2" target="_blank">here</a>')
figLegend <- paste0(
'**Figure ',
figureCount,
':** ',
' tSNE showing all cells from all samples together. Download a pdf of this figure <a href="',FNrel,'" target="_blank">here</a>.',
'An interactive version of this figure can be found ', link, '. '
)
figureCount <- figureCount + 1
NewChnk <- paste0(
"#### ", tag,
"\n```{r Sample_tSNE_",
tag,", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
figLegend,"'}\n",
"\n",
"\n print(plotList[['",tag,"']])",
"\n cat( '\n')",
"\n\n\n```\n"
)
chnkVec <- c(
chnkVec,
NewChnk
)
## Done first tsne all samples ##
###############################################################################
###############################################################################
## Make one UMAP plot per sample ##
sampleVec <- sort(unique(OsC@meta.data$sampleName))
dfPlot <- OsC@meta.data
pos <- grep("included", names(dfPlot))
if (length(pos) == 0){
dfPlot[["included"]] <- "+"
}
pos <- grep("cellID", names(dfPlot))
if (length(pos) == 0){
dfPlot[["cellID"]] <- row.names(dfPlot)
}
## Get UMAP coordinates ##
coord <- data.frame(OsC@reductions$umap@cell.embeddings)
coord[["cellID"]] <- row.names(coord)
coord <-coord[coord$cellID %in% dfPlot$cellID, ]
dfPlot$UMAP_1 <- NULL
dfPlot$UMAP_2 <- NULL
dfPlot <- merge(dfPlot, coord, by.x = "cellID", by.y="cellID", all=T)
dfPlot[is.na(dfPlot)] <- 0
dfPlot <- dfPlot[dfPlot$UMAP_1 != 0 & dfPlot$UMAP_2 != 0,]
## Add cluster colors ##
dfPlot[["Cluster"]] <- dfPlot$sampleID
clusterVec <- as.vector(unique(sort(dfPlot$sampleName)))
# library(scales)
# clusterCols = hue_pal()(length(clusterVec))
#
# dfPlot$Cluster <- factor(dfPlot$Cluster, levels = clusterVec)
maxX <- 1.1*max(dfPlot$UMAP_1, na.rm = T)
minX <- 1.1*min(dfPlot$UMAP_1, na.rm = T)
maxY <- 1.1*max(dfPlot$UMAP_2, na.rm = T)
minY <- 1.1*min(dfPlot$UMAP_2, na.rm = T)
for (i in 1:length(sampleVec)){
tag <- paste0("UMAP_plot_by_", sampleVec[i])
dfPlotSel <- dfPlot[dfPlot$sampleName == sampleVec[i], ]
dfPlotSel$sampleName
plotList[[tag]] <- ggplot(data=dfPlotSel[dfPlotSel$included == "+",], aes(UMAP_1, UMAP_2, color=sampleName)
) + geom_point( shape=16, size = as.numeric(dotsize)
) + xlab("UMAP1") + ylab("UMAP2"
) + theme_bw(
) + theme(
axis.text.y = element_text(size=8),
axis.text.x = element_text(size=8),
axis.title.y = element_text(size=8),
axis.title.x = element_text(size=8),
axis.line = element_line(colour = "black"),
panel.border = element_rect(colour = "black", fill=NA, size=1),
plot.title = element_text(hjust = 0.5, size = 12),
legend.title = element_blank()
) + guides(col = guide_legend(override.aes = list(shape = 16, size = legendDotSize))
) + ggtitle(paste0("Sample: ", gsub("_", " ", tag))
) + xlim(minX, maxX) + ylim(minY, maxY
) + coord_fixed(ratio=1)
#) + scale_color_manual(values = clusterCols[i]
h <- sum(c("sampleName", "sampleColor") %in% names(dfPlotSel))
if (h ==2){
dfCol <- unique(dfPlotSel[,c("sampleName", "sampleColor")])
colVec <- as.vector(dfCol$sampleColor)
names(colVec) <- as.vector(dfCol$sampleName)
plotList[[tag]] <- plotList[[tag]] + scale_colour_manual("Samples" ,values = colVec
) + guides(col = guide_legend(override.aes = list(shape = 16, size = legendDotSize))
)
}
if (length(unique(dfPlot$Cluster)) > 15){
plotList[[tag]] <- plotList[[tag]] + theme(legend.position = "none")
}
FNbase <- paste0(tag, VersionPdfExt)
FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
FNrel <- paste0("report_figures/", FNbase)
pdf(FN)
print(plotList[[tag]])
dev.off()
figLegend <- paste0(
'**Figure ',
figureCount,
':** ',
' Sample-level UMAPs. Download a pdf of this figure <a href="',FNrel,'" target="_blank">here</a>.'
)
figureCount <- figureCount + 1
NewChnk <- paste0(
paste("#### ", tag),
"\n```{r Sample_UMAP_",
tag,", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
figLegend,"'}\n",
"\n",
"\n print(plotList[['",tag,"']])",
"\n cat( '\n')",
"\n\n\n```\n"
)
chnkVec <- c(
chnkVec,
NewChnk
)
}
## Done making one umap plot per sample ##
###############################################################################
###############################################################################
## Add cluster dendrogram by sample ##
if (length(unique(OsC@meta.data$sampleID)) > 2){
library(ggtree)
Idents(OsC) <- "sampleName"
OsC <- BuildClusterTree(OsC)
tag <- paste0("Sample_Dendrogram")
OsC@tools$BuildClusterTree$tip.label <- paste0( OsC@tools$BuildClusterTree$tip.label)
plotList[[tag]] <- ggplot(OsC@tools$BuildClusterTree
) + geom_tree(
) + theme_tree(
)
h <- sum(c("sampleName", "sampleColor") %in% names(OsC@meta.data))
if (h ==2){
dfCol <- unique(OsC@meta.data[,c("sampleName", "sampleColor")])
colVec <- as.vector(dfCol$sampleColor)
names(colVec) <- as.vector(dfCol$sampleName)
plotList[[tag]] <- plotList[[tag]] + geom_tiplab(color=colVec
)
} else {
plotList[[tag]] <- plotList[[tag]] + geom_tiplab(
)
}
#plotList[[tag]] <- plotList[[tag]] + geom_tippoint(aes(color=sampleName), size=1.5)
plotList[[tag]] <- plotList[[tag]] + labs(title=tag
) + theme(
panel.border = element_rect(colour = "black", fill=NA, size=1),
axis.title.x=element_blank(),
plot.title = element_text(hjust = 0.5, size = 12)
) + xlim(0,dendrofactor*max(OsC@tools$BuildClusterTree[[2]]))
## Save to file ##
FNbase <- paste0(tag,".", VersionPdfExt)
FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
FNrel <- paste0("report_figures/", FNbase)
pdf(FN)
print(plotList[[tag]])
dev.off()
figLegend <- paste0(
'**Figure ',
figureCount,
':** ',
' Clusterplot dendrogram by sample ID. ','A pdf of this figure can be downloaded <a href="',FNrel,'" target="_blank">here</a>.'
)
NewChnk <- paste0(
"#### SampleID Dendrogram",
"\n```{r ", tag, "results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
figLegend,"'}\n",
"\n",
"\n print(plotList[['",tag,"']])",
"\n cat( '\n')",
"\n\n\n```\n"
)
chnkVec <- c(
chnkVec,
NewChnk
)
figureCount <- figureCount + 1
}
## Done by sample ##
###############################################################################
if (length(plotList) > 2){
tabVar <- ".tabset .tabset-fade .tabset-dropdown"
} else {
tabVar <- ".tabset .tabset-fade .tabset-pills"
}
cat(paste(knit(text = chnkVec, quiet = T), collapse = '\n'))
Figure 4: Sample-level UMAPs. Download a pdf of this figure here.
## Ensure that hmIdent exists ##
## Want to average by cluster and subset by sampleID ##
###############################################################################
## Calculataing average expression across all samples ##
OsC@meta.data[["all"]] <- "all"
Idents(OsC) <- "all"
cluster.averages <- AverageExpression(OsC, return.seurat = TRUE)
## Retrieved Scaled data ##
dfAvgExpr <- data.frame(cluster.averages[["RNA"]]@data)
#dfAvgExpr <- dfAvgExpr[,sort(names(dfAvgExpr))]
dfAvgExpr[["gene"]] <- row.names(dfAvgExpr)
dfAvgScaledData <- data.frame(cluster.averages[["RNA"]]@scale.data)
#dfAvgScaledData <- dfAvgScaledData[,sort(names(dfAvgScaledData))]
dfAvgScaledData[["gene"]] <- row.names(dfAvgScaledData)
Obio@dataTableList[["dfAvglg10ExprAll"]] <- dfAvgExpr
Obio@dataTableList[["dfAvglg10ExprAllScaled"]] <- dfAvgScaledData
## Done ##
###############################################################################
# OsC@meta.data[["clustIdent"]] <- paste0(
# "C_", OsC@meta.data[,"seurat_clusters"]
# )
## Order by Seurat cluster order for now ##
#OsC@meta.data <- OsC@meta.data[order(OsC@meta.data$seurat_clusters),]
#OsC@meta.data$clusterName <- factor(OsC@meta.data$clusterName , levels = c(unique(OsC@meta.data$clusterName)))
Idents(OsC) <- "clusterName"
#Idents(OsC) <- factor(Idents(OsC), levels = unique(levels(OsC)))
if (length(grep("sampleName", names(OsC@meta.data))) > 0){
OsC@meta.data[["cluster_sample"]] <- paste0(
OsC@meta.data$clusterName, "_", OsC@meta.data$sampleName
)
Idents(OsC) <- "cluster_sample"
Idents(OsC) <- factor(Idents(OsC), levels = c(unique(OsC@meta.data$cluster_sample)))
#OsC@meta.data$cluster_sample <- factor(OsC@meta.data$cluster_sample , levels = c(unique(OsC@meta.data$cluster_sample)))
#Idents(OsC) <- "cluster_sample"
cluster.averages <- AverageExpression(OsC, return.seurat = TRUE)
} else {
cluster.averages <- AverageExpression(OsC, return.seurat = TRUE)
}
#Idents(OsC) <- "sampleName"
## Retrieved Scaled data ##
dfAvgExpr <- data.frame(cluster.averages[["RNA"]]@data)
dfAvgExpr <- dfAvgExpr[,sort(names(dfAvgExpr))]
dfAvgExpr[["gene"]] <- row.names(dfAvgExpr)
dfAvgScaledData <- data.frame(cluster.averages[["RNA"]]@scale.data)
dfAvgScaledData <- dfAvgScaledData[,sort(names(dfAvgScaledData))]
dfAvgScaledData[["gene"]] <- row.names(dfAvgScaledData)
Obio@dataTableList[["dfAvglg10ExprByClusterBySample"]] <- dfAvgExpr
Obio@dataTableList[["dfAvglg10ExprByClusterBySampleScaled"]] <- dfAvgScaledData
###############################################################################
###############################################################################
## Average by Cluster ##
Idents(OsC) <- "clusterName"
cluster.averages <- AverageExpression(OsC, return.seurat = TRUE)
dfAvgExpr <- data.frame(cluster.averages[["RNA"]]@data)
dfAvgExpr <- data.frame(dfAvgExpr[,sort(names(dfAvgExpr))])
## Done ##
###############################################################################
dfAvgExpr[["gene"]] <- row.names(dfAvgExpr)
dfAvgScaledData <- data.frame(cluster.averages[["RNA"]]@scale.data)
dfAvgScaledData <- dfAvgScaledData[,sort(names(dfAvgScaledData))]
dfAvgScaledData[["gene"]] <- row.names(dfAvgScaledData)
Obio@dataTableList[["dfAvglg10ExprPerCluster"]] <- dfAvgExpr
Obio@dataTableList[["dfAvglg10ExprPerClusterScaled"]] <- dfAvgScaledData
## Done Average by Cluster ##
###############################################################################
###############################################################################
## Average gene expression by sample ##
Idents(OsC) <- "sampleName"
cluster.averages <- AverageExpression(OsC, return.seurat = TRUE)
## Retrieved Scaled data ##
dfAvgExpr <- data.frame(cluster.averages[["RNA"]]@data)
dfAvgExpr[["gene"]] <- row.names(dfAvgExpr)
selVec <- names(dfAvgExpr)
###############################################################################
## Order average expression by cluster similarity (most variable genes) ##
assay <- NULL
assay <- assay %||% DefaultAssay(object = OsC)
features <- VariableFeatures(object = OsC)
features <- intersect(x = features, y = rownames(x = OsC))
data.avg <- AverageExpression(object = OsC, assays = assay,
features = features, slot = "data", verbose = T)[[1]]
if (ncol(data.avg) > 1){
data.dist <- dist(x = t(x = data.avg[features, ]))
dforder <- hclust(d = data.dist)
orderVec <- names(dfAvgExpr)[dforder$order]
} else {
orderVec <- names(data.avg)
}
Obio@parameterList[["sampleIDOrder"]] <- orderVec
dfAvgExpr <- data.frame(dfAvgExpr[,c("gene",orderVec) ])
## Reset sample order by clustering ##
#OsC@meta.data$sampleID
## Done ##
###############################################################################
dfAvgScaledData <- data.frame(cluster.averages[["RNA"]]@scale.data)
selVec <- names(dfAvgScaledData)
dfAvgScaledData[["gene"]] <- row.names(dfAvgScaledData)
selVec <- c("gene", selVec)
dfAvgScaledData <- dfAvgScaledData[,selVec]
Obio@dataTableList[["dfAvglg10ExprBySample"]] <- dfAvgExpr
Obio@dataTableList[["dfAvglg10ExprBySampleScaled"]] <- dfAvgScaledData
## Done average gene expression by sample ##
###############################################################################
reductionVec <- c("umap", "tsne")
plotList <- list()
chnkVec <- as.vector(NULL, mode = "character")
###############################################################################
## First UMAP all samples together ##
tag <- paste0("UMAP_All_Samples")
dfPlot <- OsC@meta.data
pos <- grep("included", names(dfPlot))
if (length(pos) == 0){
dfPlot[["included"]] <- "+"
}
dfPlot[["cellID"]] <- row.names(dfPlot)
dfPlot$UMAP_1 <- NULL
dfPlot$UMAP_2 <- NULL
## Get UMAP coordinates ##
coord <- data.frame(OsC@reductions$umap@cell.embeddings)
coord[["cellID"]] <- row.names(coord)
coord <-coord[coord$cellID %in% dfPlot$cellID, ]
dfPlot <- merge(dfPlot, coord, by.x = "cellID", by.y="cellID", all=T)
dfPlot[is.na(dfPlot)] <- 0
dfPlot <- dfPlot[dfPlot$UMAP_1 != 0 & dfPlot$UMAP_2 != 0,]
## Add cluster colors ##
dfPlot[["Cluster"]] <- dfPlot$clusterName
clusterVec <- Obio@parameterList$clusterNameOrder
dfPlot$Cluster <- factor(dfPlot$Cluster, levels = clusterVec)
maxX <- 1.1*max(dfPlot$UMAP_1, na.rm = T)
minX <- 1.1*min(dfPlot$UMAP_1, na.rm = T)
maxY <- 1.1*max(dfPlot$UMAP_2, na.rm = T)
minY <- 1.1*min(dfPlot$UMAP_2, na.rm = T)
library(scales)
clusterCols = hue_pal()(length(clusterVec))
# dotsize = 1
# if (nrow(dfPlot) > 10000){
# dotsize = 0.75
# } else if (nrow(dfPlot) > 20000){
# dotsize = 0.5
# } else if (nrow(dfPlot) > 50000){
# dotsize = 0.25
# }
plotList[[tag]] <- ggplot(data=dfPlot[dfPlot$included == "+",], aes(UMAP_1, UMAP_2, color=Cluster)
) + geom_point( shape=16, size = as.numeric(dotsize)
) + xlab("UMAP1") + ylab("UMAP2"
) + theme_bw(
) + theme(
axis.text.y = element_text(size=8),
axis.text.x = element_text(size=8),
axis.title.y = element_text(size=8),
axis.title.x = element_text(size=8),
axis.line = element_line(colour = "black"),
panel.border = element_rect(colour = "black", fill=NA, size=1),
plot.title = element_text(hjust = 0.5, size = 12),
legend.title = element_blank()
) + guides(col = guide_legend(override.aes = list(shape = 16, size = legendDotSize))
) + ggtitle(paste0("Sample: ", gsub("_", " ", tag))
) + xlim(minX, maxX) + ylim(minY, maxY
) + coord_fixed(ratio=1
)
h <- sum(c("clusterName", "clusterColor") %in% names(dfPlot))
if (h ==2){
dfCol <- unique(dfPlot[,c("clusterName", "clusterColor")])
colVec <- as.vector(dfCol$clusterColor)
names(colVec) <- as.vector(dfCol$clusterName)
plotList[[tag]] <- plotList[[tag]] + scale_colour_manual("Clusters" ,values = colVec
) + guides(col = guide_legend(override.aes = list(shape = 16, size = legendDotSize))
)
}
if (length(unique(dfPlot$Cluster)) > 15){
plotList[[tag]] <- plotList[[tag]] + theme(legend.position = "none")
}
FNbase <- paste0(tag, VersionPdfExt)
FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
FNrel <- paste0("report_figures/", FNbase)
pdf(FN)
print(plotList[[tag]])
dev.off()
link <- paste0('<a href="https://',urlString,'/',Obio@parameterList$project_id,'/pca?x_axis=UMAP_1&y_axis=UMAP_2" target="_blank">here</a>')
figLegend <- paste0(
'**Figure ',
figureCount,
':** ',
' UMAP showing all cells from all samples together. Download a pdf of this figure <a href="',FNrel,'" target="_blank">here</a>.',
'An interactive version of this figure can be found ', link, '. '
)
figureCount <- figureCount + 1
NewChnk <- paste0(
"#### ", tag,
"\n```{r SL_UMAP_",
tag,", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
figLegend,"'}\n",
"\n",
"\n print(plotList[['",tag,"']])",
"\n cat( '\n')",
"\n\n\n```\n"
)
chnkVec <- c(
chnkVec,
NewChnk
)
## Done first umap all samples ##
###############################################################################
###############################################################################
## First tsne all samples together ##
tag <- paste0("tSNE_All_Samples")
dfPlot <- OsC@meta.data
pos <- grep("included", names(dfPlot))
if (length(pos) == 0){
dfPlot[["included"]] <- "+"
}
dfPlot[["cellID"]] <- row.names(dfPlot)
dfPlot$tSNE_1 <- NULL
dfPlot$tSNE_2 <- NULL
## Get tsNE coordinates ##
coord <- data.frame(OsC@reductions$tsne@cell.embeddings)
coord[["cellID"]] <- row.names(coord)
coord <-coord[coord$cellID %in% dfPlot$cellID, ]
dfPlot <- merge(dfPlot, coord, by.x = "cellID", by.y="cellID", all=T)
dfPlot[is.na(dfPlot)] <- 0
dfPlot <- dfPlot[dfPlot$tSNE_1 != 0 & dfPlot$tSNE_2 != 0,]
## Add cluster colors ##
dfPlot[["Cluster"]] <- dfPlot$clusterName
clusterVec <- Obio@parameterList$clusterNameOrder
dfPlot$Cluster <- factor(dfPlot$Cluster, levels = clusterVec)
maxX <- 1.1*max(dfPlot$tSNE_1, na.rm = T)
minX <- 1.1*min(dfPlot$tSNE_1, na.rm = T)
maxY <- 1.1*max(dfPlot$tSNE_2, na.rm = T)
minY <- 1.1*min(dfPlot$tSNE_2, na.rm = T)
library(scales)
clusterCols = hue_pal()(length(clusterVec))
dfPlot$Cluster <- factor(dfPlot$Cluster, levels = clusterVec)
# dotsize = 1.5
# if (nrow(dfPlot) > 10000){
# dotsize = 0.75
# } else if (nrow(dfPlot) > 50000){
# dotsize = 0.5
# } else {
# dotsize = 0.25
# }
dfPlot$clusterName <- factor(dfPlot$clusterName)
plotList[[tag]] <- ggplot(data=dfPlot[dfPlot$included == "+",], aes(tSNE_1, tSNE_2, color=Cluster)
) + geom_point( shape=16, size = as.numeric(dotsize)
) + xlab("tSNE1") + ylab("tSNE2"
) + theme_bw(
) + theme(
axis.text.y = element_text(size=8),
axis.text.x = element_text(size=8),
axis.title.y = element_text(size=8),
axis.title.x = element_text(size=8),
axis.line = element_line(colour = "black"),
panel.border = element_rect(colour = "black", fill=NA, size=1),
plot.title = element_text(hjust = 0.5, size = 12),
legend.title = element_blank()
) + guides(col = guide_legend(override.aes = list(shape = 16, size = legendDotSize))
) + ggtitle(paste0("Sample: ", tag)
) + xlim(minX, maxX) + ylim(minY, maxY
) + coord_fixed(ratio=1
)
h <- sum(c("clusterName", "clusterColor") %in% names(dfPlot))
if (h ==2){
dfCol <- unique(dfPlot[,c("clusterName", "clusterColor")])
colVec <- as.vector(dfCol$clusterColor)
names(colVec) <- as.vector(dfCol$clusterName)
plotList[[tag]] <- plotList[[tag]] + scale_colour_manual("Clusters" ,values = colVec
) + guides(col = guide_legend(override.aes = list(shape = 16, size = legendDotSize))
)
}
if (length(unique(dfPlot$Cluster)) > 15){
plotList[[tag]] <- plotList[[tag]] + theme(legend.position = "none")
}
FNbase <- paste0(tag, VersionPdfExt)
FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
FNrel <- paste0("report_figures/", FNbase)
pdf(FN)
print(plotList[[tag]])
dev.off()
link <- paste0('<a href="https://',urlString,'/',Obio@parameterList$project_id,'/pca?x_axis=tSNE_1&y_axis=tSNE_2" target="_blank">here</a>')
figLegend <- paste0(
'**Figure ',
figureCount,
':** ',
' tSNE showing all cells from all samples together. Download a pdf of this figure <a href="',FNrel,'" target="_blank">here</a>.',
'An interactive version of this figure can be found ', link, '. '
)
figureCount <- figureCount + 1
NewChnk <- paste0(
"#### ", tag,
"\n```{r SL_tSNE_",
tag,", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
figLegend,"'}\n",
"\n",
"\n print(plotList[['",tag,"']])",
"\n cat( '\n')",
"\n\n\n```\n"
)
chnkVec <- c(
chnkVec,
NewChnk
)
## Done first tsne all samples ##
###############################################################################
###############################################################################
## Make one UMAP plot per sample ##
sampleVec <- sort(unique(OsC@meta.data$sampleID))
dfPlot <- OsC@meta.data
pos <- grep("included", names(dfPlot))
if (length(pos) == 0){
dfPlot[["included"]] <- "+"
}
dfPlot[["cellID"]] <- row.names(dfPlot)
## Get UMAP coordinates ##
coord <- data.frame(OsC@reductions$umap@cell.embeddings)
coord[["cellID"]] <- row.names(coord)
coord <-coord[coord$cellID %in% dfPlot$cellID, ]
dfPlot$UMAP_1 <- NULL
dfPlot$UMAP_2 <- NULL
dfPlot <- merge(dfPlot, coord, by.x = "cellID", by.y="cellID", all=T)
dfPlot[is.na(dfPlot)] <- 0
dfPlot <- dfPlot[dfPlot$UMAP_1 != 0 & dfPlot$UMAP_2 != 0,]
## Add cluster colors ##
dfPlot[["Cluster"]] <- dfPlot$clusterName
clusterVec <- as.vector(unique(sort(dfPlot$clusterName)))
library(scales)
clusterCols = hue_pal()(length(clusterVec))
dfPlot$Cluster <- factor(dfPlot$Cluster, levels = clusterVec)
maxX <- 1.1*max(dfPlot$UMAP_1, na.rm = T)
minX <- 1.1*min(dfPlot$UMAP_1, na.rm = T)
maxY <- 1.1*max(dfPlot$UMAP_2, na.rm = T)
minY <- 1.1*min(dfPlot$UMAP_2, na.rm = T)
for (i in 1:length(sampleVec)){
tag <- paste0("UMAP_plot_by_", sampleVec[i])
dfPlotSel <- dfPlot[dfPlot$sampleID == sampleVec[i], ]
plotList[[tag]] <- ggplot(data=dfPlotSel[dfPlotSel$included == "+",], aes(UMAP_1, UMAP_2, color=Cluster)
) + geom_point( shape=16, size = as.numeric(dotsize)
) + xlab("UMAP1") + ylab("UMAP2"
) + theme_bw(
) + theme(
axis.text.y = element_text(size=8),
axis.text.x = element_text(size=8),
axis.title.y = element_text(size=8),
axis.title.x = element_text(size=8),
axis.line = element_line(colour = "black"),
panel.border = element_rect(colour = "black", fill=NA, size=1),
plot.title = element_text(hjust = 0.5, size = 12),
legend.title = element_blank()
) + guides(col = guide_legend(override.aes = list(shape = 16, size = legendDotSize))
) + ggtitle(paste0("Sample: ", gsub("_", " ", tag))
) + xlim(minX, maxX) + ylim(minY, maxY
) + coord_fixed(ratio=1
)
h <- sum(c("clusterName", "clusterColor") %in% names(dfPlotSel))
if (h ==2){
dfCol <- unique(dfPlotSel[,c("clusterName", "clusterColor")])
colVec <- as.vector(dfCol$clusterColor)
names(colVec) <- as.vector(dfCol$clusterName)
plotList[[tag]] <- plotList[[tag]] + scale_colour_manual("Clusters" ,values = colVec
) + guides(col = guide_legend(override.aes = list(shape = 16, size = legendDotSize))
)
}
if (length(unique(dfPlot$Cluster)) > 15){
plotList[[tag]] <- plotList[[tag]] + theme(legend.position = "none")
}
FNbase <- paste0(tag, VersionPdfExt)
FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
FNrel <- paste0("report_figures/", FNbase)
pdf(FN)
print(plotList[[tag]])
dev.off()
figLegend <- paste0(
'**Figure ',
figureCount,
':** ',
' Sample-level UMAPs. Download a pdf of this figure <a href="',FNrel,'" target="_blank">here</a>.'
)
figureCount <- figureCount + 1
NewChnk <- paste0(
paste("#### ", tag),
"\n```{r SL_UMAP_",
tag,", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
figLegend,"'}\n",
"\n",
"\n print(plotList[['",tag,"']])",
"\n cat( '\n')",
"\n\n\n```\n"
)
chnkVec <- c(
chnkVec,
NewChnk
)
}
## Done making one umap plot per sample ##
###############################################################################
###############################################################################
## Add cluster dendrogram by sample ##
if (length(unique(OsC@meta.data$sampleID)) > 3){
library(ggtree)
Idents(OsC) <- "sampleName"
OsC <- BuildClusterTree(OsC)
tag <- paste0("Sample_Dendrogram")
OsC@tools$BuildClusterTree$tip.label <- paste0( OsC@tools$BuildClusterTree$tip.label)
plotList[[tag]] <- ggplot(OsC@tools$BuildClusterTree
) + geom_tree(
) + theme_tree(
)
h <- sum(c("sampleName", "sampleColor") %in% names(OsC@meta.data))
if (h ==2){
dfCol <- unique(OsC@meta.data[,c("sampleName", "sampleColor")])
colVec <- as.vector(dfCol$sampleColor)
names(colVec) <- as.vector(dfCol$sampleName)
plotList[[tag]] <- plotList[[tag]] + geom_tiplab(color=colVec
)
} else {
plotList[[tag]] <- plotList[[tag]] + geom_tiplab(
)
}
plotList[[tag]] <- plotList[[tag]] + labs(title=tag
) + theme(
panel.border = element_rect(colour = "black", fill=NA, size=1),
axis.title.x=element_blank(),
plot.title = element_text(hjust = 0.5, size = 12)
) + xlim(0,dendrofactor*max(OsC@tools$BuildClusterTree[[2]]))
## Save to file ##
FNbase <- paste0(tag,".", VersionPdfExt)
FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
FNrel <- paste0("report_figures/", FNbase)
pdf(FN)
print(plotList[[tag]])
dev.off()
figLegend <- paste0(
'**Figure ',
figureCount,
':** ',
' Clusterplot dendrogram by sample ID. ','A pdf of this figure can be downloaded <a href="',FNrel,'" target="_blank">here</a>.'
)
NewChnk <- paste0(
"#### SampleID Dendrogram",
"\n```{r ", tag, "results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
figLegend,"'}\n",
"\n",
"\n print(plotList[['",tag,"']])",
"\n cat( '\n')",
"\n\n\n```\n"
)
chnkVec <- c(
chnkVec,
NewChnk
)
figureCount <- figureCount + 1
}
## Done by sample ##
###############################################################################
###############################################################################
## Add cluster dendrogram by cluster ##
library(ggtree)
Idents(OsC) <- "clusterName"
OsC <- BuildClusterTree(OsC)
tag <- paste0("Cluster_Dendrogram")
OsC@tools$BuildClusterTree$tip.label <- paste0( OsC@tools$BuildClusterTree$tip.label)
plotList[[tag]] <- ggplot(OsC@tools$BuildClusterTree
) + geom_tree(
) + theme_tree(
)
h <- sum(c("clusterName", "clusterColor") %in% names(OsC@meta.data))
if (h ==2){
dfCol <- unique(OsC@meta.data[,c("clusterName", "clusterColor")])
colVec <- as.vector(dfCol$clusterColor)
names(colVec) <- as.vector(dfCol$clusterName)
plotList[[tag]] <- plotList[[tag]] + geom_tiplab(color=colVec
)
} else {
plotList[[tag]] <- plotList[[tag]] + geom_tiplab(
)
}
## Additional options
# https://guangchuangyu.github.io/ggtree-book/chapter-ggtree.html
# plotList[[tag]] <- plotList[[tag]] + geom_point2(aes(subset=node==5), color='darkgreen', size=5)
plotList[[tag]] <- plotList[[tag]] + labs(title=tag
) + theme(
panel.border = element_rect(colour = "black", fill=NA, size=1),
axis.title.x=element_blank(),
plot.title = element_text(hjust = 0.5, size = 12)
) + xlim(0, dendrofactor*max(OsC@tools$BuildClusterTree[[2]]))
#+ xlim(-1,1.2*max(OsC@tools$BuildClusterTree$edge))
## Save to file ##
FNbase <- paste0(tag,".", VersionPdfExt)
FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
FNrel <- paste0("report_figures/", FNbase)
pdf(FN)
print(plotList[[tag]])
dev.off()
figLegend <- paste0(
'**Figure ',
figureCount,
':** ',
' Clusterplot dendrogram. ','A pdf of this figure can be downloaded <a href="',FNrel,'" target="_blank">here</a>.'
)
NewChnk <- paste0(
"#### Cluster Dendrogram",
"\n```{r ", tag, "results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
figLegend,"'}\n",
"\n",
"\n print(plotList[['",tag,"']])",
"\n cat( '\n')",
"\n\n\n```\n"
)
chnkVec <- c(
chnkVec,
NewChnk
)
figureCount <- figureCount + 1
## Done integraed analysis ##
###############################################################################
if (length(plotList) > 3){
tabVar <- ".tabset .tabset-fade .tabset-dropdown"
} else {
tabVar <- ".tabset .tabset-fade .tabset-pills"
}
cat(paste(knit(text = chnkVec, quiet = T), collapse = '\n'))
Figure 7: Sample-level UMAPs. Download a pdf of this figure here.
Figure 8: Clusterplot dendrogram. A pdf of this figure can be downloaded here.
library(sleepwalk)
sleepwalk(
OsC@reductions$umap@cell.embeddings,
OsC@reductions$pca@cell.embeddings,
saveToFile=paste(Obio@parameterList$outputDir,"sleepwalk.UMAP.html",sep='')
)
htmltools::includeHTML(paste(Obio@parameterList$outputDir,"sleepwalk.UMAP.html",sep=''))
## Add UMAP coordinates to Metadata ##
dfAdd <- data.frame(OsC@reductions$umap@cell.embeddings)
OsC <- addDf2seuratMetaData(
obj = OsC,
dfAdd = dfAdd
)
## Add tSNE coordinates to Metadata ##
dfAdd <- data.frame(OsC@reductions$tsne@cell.embeddings)
OsC <- addDf2seuratMetaData(
obj = OsC,
dfAdd = dfAdd
)
plotList <- list()
chnkVec <- as.vector(NULL, mode = "character")
dfTemp <- OsC@meta.data
pos <- grep("DF_Classification", names(dfTemp))
if (length(pos) > 0){
## First make variation plot for integrated samples, than for all individual samples separately
tag <- "Doublet_plot"
dfTemp$DF_Classification <- factor(dfTemp$DF_Classification, levels = sort(unique(dfTemp$DF_Classification)))
#dotsize <- round(7500/nrow(dfTemp),1)
# dotsize <- 0.3
colVec <- c("black", "red")
names(colVec) <- c("Singlet", "Doublet")
colVec <- colVec[unique(dfTemp$DF_Classification)]
plotList[[tag]] <- ggplot(dfTemp, aes(UMAP_1, UMAP_2, color=DF_Classification)
)+ geom_point(
shape = 16,
size = as.numeric(dotsize)
) + xlab("UMAP1") + ylab("UMAP2"
) + theme_bw(
) + theme(
axis.text.y = element_text(size=8),
axis.text.x = element_text(size=8),
axis.title.y = element_text(size=8),
axis.title.x = element_text(size=8),
axis.line = element_line(colour = "black"),
panel.border = element_rect(colour = "black", fill=NA, size=1),
plot.title = element_text(hjust = 0.5, size = 12)
) + ggtitle("Potential Doublets"
) + scale_color_manual(values=colVec
)
#+ xlim(minX, maxX) + ylim(minY, maxY)
## Save to file ##
FNbase <- paste0("DoubletFinderAll", VersionPdfExt)
FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
FNrel <- paste0("report_figures/", FNbase)
pdf(FN)
print(plotList[[tag]])
dev.off()
## Create R markdown chunk ##
figLegend <- paste0(
'**Figure ',
figureCount,
'**: Figure depicting the location of potential doublets in PCA components 1 and 2. Download a pdf of this figure <a href="',FNrel,'" target="_blank">here</a>. '
)
figureCount <- figureCount + 1
NewChnk <- paste0(
"#### Doublets All Timepoints",
"\n```{r ", tag, ", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
figLegend,"'}\n",
"\n",
"\n print(plotList[['",tag,"']])",
"\n cat( '\n')",
"\n\n\n```\n"
)
chnkVec <- c(
chnkVec,
NewChnk
)
}
if (length(plotList) > 3){
tabVar <- ".tabset .tabset-fade .tabset-dropdown"
} else {
tabVar <- ".tabset .tabset-fade .tabset-pills"
}
This analyis gives an indication of which cells MIGHT be doublets. Chris McGinnis DoubletFinder package is used to identify potential doublets.
## plot list will be integrated in full figure ##
if (length(pos) > 0){
cat(paste(knit(text = chnkVec, quiet = T), collapse = '\n'))
}
Figure 9: Figure depicting the location of potential doublets in PCA components 1 and 2. Download a pdf of this figure here.
###############################################################################
## Estimate cell cycle genes ##
exprFN <- paste0(hpc.mount, "Projects/reference_data/cell_cycle_vignette_files/nestorawa_forcellcycle_expressionMatrix.txt")
exp.mat <- read.table(file = exprFN, header = TRUE,
as.is = TRUE, row.names = 1)
# A list of cell cycle markers, from Tirosh et al, 2015, is loaded with Seurat. We can
# segregate this list into markers of G2/M phase and markers of S phase
s.genes <- cc.genes$s.genes
g2m.genes <- cc.genes$g2m.genes
print(paste0("Used as S-phase marker genes: ", sort(unique(paste(s.genes, collapse = ", ")))))
print(paste0("Used as G2M-phase marker genes: ", sort(unique(paste(g2m.genes, collapse = ", ")))))
# Create our Seurat object and complete the initalization steps
OsC <- CellCycleScoring(OsC, s.features = s.genes, g2m.features = g2m.genes, set.ident = TRUE)
reductionVec <- c("umap", "tsne")
plotList <- list()
chnkVec <- as.vector(NULL, mode = "character")
###############################################################################
## First UMAP all samples together ##
tag <- paste0("CellCyclePhase_All_Samples")
dfPlot <- OsC@meta.data
pos <- grep("included", names(dfPlot))
if (length(pos) == 0){
dfPlot[["included"]] <- "+"
}
dfPlot[["cellID"]] <- row.names(dfPlot)
dfPlot$UMAP_1 <- NULL
dfPlot$UMAP_2 <- NULL
## Get UMAP coordinates ##
coord <- data.frame(OsC@reductions$umap@cell.embeddings)
coord[["cellID"]] <- row.names(coord)
coord <-coord[coord$cellID %in% dfPlot$cellID, ]
dfPlot <- merge(dfPlot, coord, by.x = "cellID", by.y="cellID", all=T)
dfPlot[is.na(dfPlot)] <- 0
dfPlot <- dfPlot[dfPlot$UMAP_1 != 0 & dfPlot$UMAP_2 != 0,]
## Add cluster colors ##
#dfPlot[["Cluster"]] <- paste0("C", dfPlot$seurat_clusters)
#clusterVec <- as.vector(paste0("C", unique(sort(dfPlot$seurat_clusters))))
maxX <- 1.1*max(dfPlot$UMAP_1, na.rm = T)
minX <- 1.1*min(dfPlot$UMAP_1, na.rm = T)
maxY <- 1.1*max(dfPlot$UMAP_2, na.rm = T)
minY <- 1.1*min(dfPlot$UMAP_2, na.rm = T)
# library(scales)
# clusterCols = hue_pal()(length(clusterVec))
# dfPlot$Cluster <- factor(dfPlot$Cluster, levels = clusterVec)
# dotsize = 1
# if (nrow(dfPlot) > 10000){
# dotsize = 0.75
# } else if (nrow(dfPlot) > 20000){
# dotsize = 0.5
# } else if (nrow(dfPlot) > 50000){
# dotsize = 0.25
# }
plotList[[tag]] <- ggplot(data=dfPlot[dfPlot$included == "+",], aes(UMAP_1, UMAP_2, color=Phase)
) + geom_point( shape=16, size = as.numeric(dotsize)
) + xlab("UMAP1") + ylab("UMAP2"
) + theme_bw(
) + theme(
axis.text.y = element_text(size=8),
axis.text.x = element_text(size=8),
axis.title.y = element_text(size=8),
axis.title.x = element_text(size=8),
axis.line = element_line(colour = "black"),
panel.border = element_rect(colour = "black", fill=NA, size=1),
plot.title = element_text(hjust = 0.5, size = 12),
legend.title = element_blank()
) + guides(col = guide_legend(override.aes = list(shape = 16, size = legendDotSize))
) + ggtitle(paste0("Sample: ", tag)
) + xlim(minX, maxX) + ylim(minY, maxY
) + coord_fixed(ratio=1
)
if (length(unique(dfPlot$Cluster)) > 15){
plotList[[tag]] <- plotList[[tag]] + theme(legend.position = "none")
}
FNbase <- paste0(tag, VersionPdfExt)
FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
FNrel <- paste0("report_figures/", FNbase)
pdf(FN)
print(plotList[[tag]])
dev.off()
figLegend <- paste0(
'**Figure ',
figureCount,
':** ',
' UMAP showing all cells from all samples together with the estimated cell-cycle phase color-coded. Download a pdf of this figure <a href="',FNrel,'" target="_blank">here</a>.'
)
figureCount <- figureCount + 1
NewChnk <- paste0(
"#### ", tag,
"\n```{r CC_UMAP_",
tag,", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
figLegend,"'}\n",
"\n",
"\n print(plotList[['",tag,"']])",
"\n cat( '\n')",
"\n\n\n```\n"
)
chnkVec <- c(
chnkVec,
NewChnk
)
## Done first umap all samples ##
###############################################################################
###############################################################################
## First tsne all samples together ##
tag <- paste0("tSNE_All_Samples")
dfPlot <- OsC@meta.data
pos <- grep("included", names(dfPlot))
if (length(pos) == 0){
dfPlot[["included"]] <- "+"
}
dfPlot[["cellID"]] <- row.names(dfPlot)
dfPlot$tSNE_1 <- NULL
dfPlot$tSNE_2 <- NULL
## Get tsNE coordinates ##
coord <- data.frame(OsC@reductions$tsne@cell.embeddings)
coord[["cellID"]] <- row.names(coord)
coord <-coord[coord$cellID %in% dfPlot$cellID, ]
dfPlot <- merge(dfPlot, coord, by.x = "cellID", by.y="cellID", all=T)
dfPlot[is.na(dfPlot)] <- 0
dfPlot <- dfPlot[dfPlot$tSNE_1 != 0 & dfPlot$tSNE_2 != 0,]
## Add cluster colors ##
#dfPlot[["Cluster"]] <- paste0("C", dfPlot$seurat_clusters)
#clusterVec <- as.vector(paste0("C", unique(sort(dfPlot$seurat_clusters))))
maxX <- 1.1*max(dfPlot$tSNE_1, na.rm = T)
minX <- 1.1*min(dfPlot$tSNE_1, na.rm = T)
maxY <- 1.1*max(dfPlot$tSNE_2, na.rm = T)
minY <- 1.1*min(dfPlot$tSNE_2, na.rm = T)
#library(scales)
#clusterCols = hue_pal()(length(clusterVec))
#dfPlot$Cluster <- factor(dfPlot$Cluster, levels = clusterVec)
# dotsize = 1.5
# if (nrow(dfPlot) > 10000){
# dotsize = 0.75
# } else if (nrow(dfPlot) > 50000){
# dotsize = 0.5
# } else {
# dotsize = 0.25
# }
plotList[[tag]] <- ggplot(data=dfPlot[dfPlot$included == "+",], aes(tSNE_1, tSNE_2, color=Phase)
) + geom_point( shape=16, size = as.numeric(dotsize)
) + xlab("tSNE1") + ylab("tSNE2"
) + theme_bw(
) + theme(
axis.text.y = element_text(size=8),
axis.text.x = element_text(size=8),
axis.title.y = element_text(size=8),
axis.title.x = element_text(size=8),
axis.line = element_line(colour = "black"),
panel.border = element_rect(colour = "black", fill=NA, size=1),
plot.title = element_text(hjust = 0.5, size = 12),
legend.title = element_blank()
) + guides(col = guide_legend(override.aes = list(shape = 16, size = legendDotSize))
) + ggtitle(paste0("Sample: ", tag)
) + xlim(minX, maxX) + ylim(minY, maxY
) + coord_fixed(ratio=1
)
if (length(unique(dfPlot$Cluster)) > 15){
plotList[[tag]] <- plotList[[tag]] + theme(legend.position = "none")
}
FNbase <- paste0(tag, VersionPdfExt)
FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
FNrel <- paste0("report_figures/", FNbase)
pdf(FN)
print(plotList[[tag]])
dev.off()
figLegend <- paste0(
'**Figure ',
figureCount,
':** ',
' tSNE showing all cells from all samples together. The esimated cell-cycle phase is color coded. Download a pdf of this figure <a href="',FNrel,'" target="_blank">here</a>.'
)
figureCount <- figureCount + 1
NewChnk <- paste0(
"#### ", tag,
"\n```{r CC_tSNE_",
tag,", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
figLegend,"'}\n",
"\n",
"\n print(plotList[['",tag,"']])",
"\n cat( '\n')",
"\n\n\n```\n"
)
chnkVec <- c(
chnkVec,
NewChnk
)
## Done first tsne all samples ##
###############################################################################
###############################################################################
## Make one UMAP plot per sample ##
sampleVec <- sort(unique(OsC@meta.data$sampleID))
dfPlot <- OsC@meta.data
pos <- grep("included", names(dfPlot))
if (length(pos) == 0){
dfPlot[["included"]] <- "+"
}
dfPlot[["cellID"]] <- row.names(dfPlot)
## Get UMAP coordinates ##
coord <- data.frame(OsC@reductions$umap@cell.embeddings)
coord[["cellID"]] <- row.names(coord)
coord <-coord[coord$cellID %in% dfPlot$cellID, ]
dfPlot$UMAP_1 <- NULL
dfPlot$UMAP_2 <- NULL
dfPlot <- merge(dfPlot, coord, by.x = "cellID", by.y="cellID", all=T)
dfPlot[is.na(dfPlot)] <- 0
dfPlot <- dfPlot[dfPlot$UMAP_1 != 0 & dfPlot$UMAP_2 != 0,]
## Add cluster colors ##
#dfPlot[["Cluster"]] <- paste0("C", dfPlot$seurat_clusters)
#clusterVec <- as.vector(paste0("C", unique(sort(dfPlot$seurat_clusters))))
#library(scales)
#clusterCols = hue_pal()(length(clusterVec))
#dfPlot$Cluster <- factor(dfPlot$Cluster, levels = clusterVec)
maxX <- 1.1*max(dfPlot$UMAP_1, na.rm = T)
minX <- 1.1*min(dfPlot$UMAP_1, na.rm = T)
maxY <- 1.1*max(dfPlot$UMAP_2, na.rm = T)
minY <- 1.1*min(dfPlot$UMAP_2, na.rm = T)
for (i in 1:length(sampleVec)){
tag <- paste0("UMAP_CC_plot_by_", sampleVec[i])
dfPlotSel <- dfPlot[dfPlot$sampleID == sampleVec[i], ]
plotList[[tag]] <- ggplot(data=dfPlotSel[dfPlot$included == "+",], aes(UMAP_1, UMAP_2, color=Phase)
) + geom_point( shape=16, size = as.numeric(dotsize)
) + xlab("UMAP1") + ylab("UMAP2"
) + theme_bw(
) + theme(
axis.text.y = element_text(size=8),
axis.text.x = element_text(size=8),
axis.title.y = element_text(size=8),
axis.title.x = element_text(size=8),
axis.line = element_line(colour = "black"),
panel.border = element_rect(colour = "black", fill=NA, size=1),
plot.title = element_text(hjust = 0.5, size = 12),
legend.title = element_blank()
) + guides(col = guide_legend(override.aes = list(shape = 16, size = legendDotSize))
) + ggtitle(paste0("Sample: ", tag)
) + xlim(minX, maxX) + ylim(minY, maxY
) + coord_fixed(ratio=1
)
if (length(unique(dfPlot$Cluster)) > 15){
plotList[[tag]] <- plotList[[tag]] + theme(legend.position = "none")
}
FNbase <- paste0(tag, VersionPdfExt)
FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
FNrel <- paste0("report_figures/", FNbase)
pdf(FN)
print(plotList[[tag]])
dev.off()
figLegend <- paste0(
'**Figure ',
figureCount,
':** ',
' Sample-level UMAPs. Estimated cell-cylce phase color-coded. Download a pdf of this figure <a href="',FNrel,'" target="_blank">here</a>.'
)
figureCount <- figureCount + 1
NewChnk <- paste0(
paste("#### ", tag),
"\n```{r CC_UMAP_",
tag,", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
figLegend,"'}\n",
"\n",
"\n print(plotList[['",tag,"']])",
"\n cat( '\n')",
"\n\n\n```\n"
)
chnkVec <- c(
chnkVec,
NewChnk
)
}
## Done making one umap plot per sample ##
###############################################################################
###############################################################################
## Add cluster dendrogram by sample ##
if (length(unique(OsC@meta.data$sampleID)) > 3){
library(ggtree)
Idents(OsC) <- "sampleName"
OsC <- BuildClusterTree(OsC)
tag <- paste0("Sample_Dendrogram")
OsC@tools$BuildClusterTree$tip.label <- paste0( OsC@tools$BuildClusterTree$tip.label)
plotList[[tag]] <- ggplot(OsC@tools$BuildClusterTree
) + geom_tree(
) + theme_tree(
)
h <- sum(c("sampleName", "sampleColor") %in% OsC@meta.data)
if (h ==2){
dfCol <- unique(OsC@meta.data[,c("sampleName", "sampleColor")])
colVec <- as.vector(dfCol$sampleColor)
names(colVec) <- as.vector(dfCol$sampleName)
plotList[[tag]] <- plotList[[tag]] + geom_tiplab(color=colVec
)
} else {
plotList[[tag]] <- plotList[[tag]] + geom_tiplab(
)
}
plotList[[tag]] <- plotList[[tag]] + labs(title=tag
) + theme(
panel.border = element_rect(colour = "black", fill=NA, size=1),
axis.title.x=element_blank(),
plot.title = element_text(hjust = 0.5, size = 12)
) + xlim(0,dendrofactor*max(OsC@tools$BuildClusterTree[[2]]))
## Save to file ##
FNbase <- paste0(tag,".", VersionPdfExt)
FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
FNrel <- paste0("report_figures/", FNbase)
pdf(FN)
print(plotList[[tag]])
dev.off()
figLegend <- paste0(
'**Figure ',
figureCount,
':** ',
' Clusterplot dendrogram by sample ID. ','A pdf of this figure can be downloaded <a href="',FNrel,'", target="_blank>here</a>.'
)
NewChnk <- paste0(
"#### SampleID Dendrogram",
"\n```{r ", tag, "results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
figLegend,"'}\n",
"\n",
"\n print(plotList[['",tag,"']])",
"\n cat( '\n')",
"\n\n\n```\n"
)
chnkVec <- c(
chnkVec,
NewChnk
)
figureCount <- figureCount + 1
}
## Done by sample ##
###############################################################################
###############################################################################
## Add cluster dendrogram by cluster ##
library(ggtree)
Idents(OsC) <- "clusterName"
OsC <- BuildClusterTree(OsC)
tag <- paste0("Cluster_Dendrogram")
OsC@tools$BuildClusterTree$tip.label <- paste0("C", OsC@tools$BuildClusterTree$tip.label)
plotList[[tag]] <- ggplot(OsC@tools$BuildClusterTree
) + geom_tree(
) + theme_tree(
)
h <- sum(c("clusterName", "clusterColor") %in% names(OsC@meta.data))
if (h ==2){
dfCol <- unique(OsC@meta.data[,c("clusterName", "clusterColor")])
colVec <- as.vector(dfCol$clusterColor)
names(colVec) <- as.vector(dfCol$clusterName)
plotList[[tag]] <- plotList[[tag]] + geom_tiplab(color=colVec
)
} else {
plotList[[tag]] <- plotList[[tag]] + geom_tiplab(
)
}
plotList[[tag]] <- plotList[[tag]] + labs(title=tag
) + theme(
panel.border = element_rect(colour = "black", fill=NA, size=1),
axis.title.x=element_blank(),
plot.title = element_text(hjust = 0.5, size = 12)
) + xlim(0,dendrofactor*max(OsC@tools$BuildClusterTree[[2]]))
#+ xlim(-1,1.2*max(OsC@tools$BuildClusterTree$edge))
## Save to file ##
FNbase <- paste0(tag,".", VersionPdfExt)
FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
FNrel <- paste0("report_figures/", FNbase)
pdf(FN)
print(plotList[[tag]])
dev.off()
figLegend <- paste0(
'**Figure ',
figureCount,
':** ',
' Clusterplot dendrogram. ','A pdf of this figure can be downloaded <a href="',FNrel,'" target="_blank">here</a>.'
)
NewChnk <- paste0(
"#### Cluster Dendrogram",
"\n```{r ", tag, "results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
figLegend,"'}\n",
"\n",
"\n print(plotList[['",tag,"']])",
"\n cat( '\n')",
"\n\n\n```\n"
)
chnkVec <- c(
chnkVec,
NewChnk
)
figureCount <- figureCount + 1
## Done integraed analysis ##
###############################################################################
if (length(plotList) > 3){
tabVar <- ".tabset .tabset-fade .tabset-dropdown"
} else {
tabVar <- ".tabset .tabset-fade .tabset-pills"
}
cat(paste(knit(text = chnkVec, quiet = T), collapse = '\n'))
Figure 10: UMAP showing all cells from all samples together with the estimated cell-cycle phase color-coded. Download a pdf of this figure here.
Figure 11: tSNE showing all cells from all samples together. The esimated cell-cycle phase is color coded. Download a pdf of this figure here.
Figure 12: Sample-level UMAPs. Estimated cell-cylce phase color-coded. Download a pdf of this figure here.
Figure 13: Clusterplot dendrogram. A pdf of this figure can be downloaded here.
###############################################################################
## Create datatable for plotting ##
## This plotting procedure requires three sets: the sets cellTypeIDs, clusterIDs, cellTypeIDs
## level1ID, level2ID, level3ID
dfSample <- unique(OsC@meta.data[,c("sampleName", "sampleColor", "sampleID")])
row.names(dfSample) <- dfSample$sampleID
dfSample <- dfSample[names(Obio@sampleDetailList),]
sampleIDs <- dfSample$sampleName
colVec <- dfSample$sampleColor
clusterIDs <- unique(OsC@meta.data[,"sampleName"])
if (length(grep("cellIdent", names(OsC@meta.data))) == 0){
OsC@meta.data[["cellIdent"]] <- "All"
}
cellTypeIDs <- unique(OsC@meta.data[,"cellIdent"])
dfTemp <- OsC@meta.data
if (length(grep("^cellIdent$", names(dfTemp))) == 0){
dfTemp[["cellIdent"]] <- "All"
}
#dfTemp <- dfTemp[dfTemp$percent.mt <= max(Obio@parameterList$singleCellSeuratMtCutoff), ]
dfTemp[["cellID"]] <- row.names(dfTemp)
dfTemp <- unique(dfTemp[,c("cellID", "sampleName", "clusterName", "cellIdent")])
names(dfTemp) <- gsub("clusterName", "Cluster", names(dfTemp) )
dfTemp <- unique(dfTemp[,c("cellID", "sampleName", "Cluster","cellIdent")])
dfRes <- dfTemp
dfRes$cellID <- NULL
row.names(dfRes) <- NULL
dfRes <- unique(dfRes)
dfRes[["N_cells"]] <- 0
for (i in 1:nrow(dfRes)){
dfRes[i, "N_cells"] <- nrow(dfTemp[dfTemp$sampleName == dfRes[i,"sampleName"] & dfTemp$Cluster == dfRes[i,"Cluster"] & dfTemp$cellIdent == dfRes[i,"cellIdent"], ])
}
## Calculate cluster percentages per celltypeID ##
dfRes[["Perc_cells"]] <- 0
for (i in 1:length(cellTypeIDs)){
dfResTemp2 <- dfRes[dfRes$cellIdent == cellTypeIDs[i], ]
tempCluster <- as.vector(unique(dfResTemp2$Cluster))
for (j in 1:length(tempCluster)){
dfResTemp3 <- dfResTemp2[dfResTemp2$Cluster == tempCluster[j],]
NclusterTotal <- sum(dfResTemp3[, "N_cells"])
dfResTemp3[,"Perc_cells"] <- round(dfResTemp3[,"N_cells"]/NclusterTotal, 4)*100
if (j ==1){
dfRes3 <- dfResTemp3
} else {
dfRes3 <- rbind(dfResTemp3, dfRes3)
}
}
if (i ==1){
dfRes4 <- dfRes3
} else {
dfRes4 <- rbind(dfRes3, dfRes4)
}
}
dfRes <- dfRes4
plotList <- list()
chnkVec <- as.vector(NULL, mode = "character")
for (i in 1:length(cellTypeIDs)){
#############################################################################
## Create cell number plot ##
tag <- paste0(cellTypeIDs[i], "_Number")
dfResTemp <- dfRes[dfRes$cellIdent == cellTypeIDs[i], ]
## Set cluster order large to small ##
library(tidyverse)
df <- dfResTemp[,c("Cluster", "N_cells")]
df <- df %>%
group_by(Cluster) %>%
summarise(N_cells = sum(N_cells)) %>% arrange(desc(N_cells))
levels <- df$Cluster
dfResTemp$Cluster <- factor(dfResTemp$Cluster, levels = levels)
## Order ##
plotList[[tag]] <- ggplot(
) + geom_bar(aes(y = N_cells, x = Cluster, fill = sampleName), data = dfResTemp, stat="identity"
) + labs(title="Cell Numbers Per Cluster", x="", y = "Cell Count"
) + theme_bw(
) + theme(
panel.border = element_rect(colour = "black", fill=NA, size=1),
plot.title = element_text(hjust = 0.5, size = 12)
) + coord_flip() + guides(fill=guide_legend(title="Samples"))
h <- sum(c("sampleName", "sampleColor") %in% names(OsC@meta.data))
if (h ==2){
dfCol <- unique(OsC@meta.data[,c("sampleName", "sampleColor")])
dfCol <- dfCol[dfCol$sampleName %in% unique(dfResTemp$sampleName), ]
colVec <- as.vector(dfCol$sampleColor)
names(colVec) <- as.vector(dfCol$sampleName)
plotList[[tag]] <- plotList[[tag]] + scale_fill_manual("Samples" ,values = colVec
)
}
## Calculate percentages for this subset ##
###########################################################################
## Save plot to file ##
FNbase <- paste0(tag,".Ncells", VersionPdfExt)
FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
FNrel <- paste0("report_figures/", FNbase)
pdf(FN)
print(plotList[[tag]])
dev.off()
## ##
###########################################################################
###########################################################################
## Add to chunk ##
figCap <- paste0(
'**Figure ',
figureCount,
'A:** Cell Count in each cluster for ',
tag,
'Download a pdf of this figure <a href="',FNrel,'" target="_blank">here</a>. '
)
NewChnk <- paste0(
paste0("#### Barchart_ ", tag),
"\n```{r Barchart-",tag,", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",figCap,"'}\n",
"\n",
"\n print(plotList[['",tag,"']])",
"\n cat( '\n')",
"\n\n\n```\n"
)
chnkVec <- c(
chnkVec,
NewChnk
)
## Done adding ##
#############################################################################
#############################################################################
## Add percentage plot ##
tag <- paste0(cellTypeIDs[i], "_Percent")
plotList[[tag]] <- ggplot(
) + geom_bar(aes(x = Cluster, y = Perc_cells, fill = sampleName), data = dfResTemp, stat="identity"
) + labs(title="Percent Cells Per Cluster", x="", y = "Percent Cells"
) + theme_bw(
) + theme(
panel.border = element_rect(colour = "black", fill=NA, size=1),
plot.title = element_text(hjust = 0.5, size = 12)
) + coord_flip()
h <- sum(c("sampleName", "sampleColor") %in% names(OsC@meta.data))
if (h ==2){
dfCol <- unique(OsC@meta.data[,c("sampleName", "sampleColor")])
dfCol <- dfCol[dfCol$sampleName %in% unique(dfResTemp$sampleName), ]
colVec <- as.vector(dfCol$sampleColor)
names(colVec) <- as.vector(dfCol$sampleName)
plotList[[tag]] <- plotList[[tag]] + scale_fill_manual("Samples" ,values = colVec
)
}
###########################################################################
## Save plot to file ##
FNbase <- paste0(tag, ".percent.cells",VersionPdfExt)
FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
FNrel <- paste0("report_figures/", FNbase)
pdf(FN)
print(plotList[[tag]])
dev.off()
## ##
###########################################################################
###########################################################################
## Add to chunk ##
figCap <- paste0(
'**Figure ',
figureCount,
'B:** Cell percentages in each cluster for ',
tag,
'Download a pdf of this figure <a href="',FNrel,'" target="_blank">here</a>. '
)
NewChnk <- paste0(
paste0("#### Barchart_ ", tag),
"\n```{r Barchart-percent_",tag,", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",figCap,"'}\n",
"\n",
"\n print(plotList[['",tag,"']])",
"\n cat( '\n')",
"\n\n\n```\n"
)
chnkVec <- c(
chnkVec,
NewChnk
)
## Done adding percentage plot ##
#############################################################################
figureCount <- figureCount + 1
#############################################################################
## Order Percent Cells by Similarity
if (!is.null(Obio@parameterList$clusterNameOrder)){
#############################################################################
## Add percentage plot ##
if (sum(unique(dfResTemp$Cluster) %in% Obio@parameterList$clusterNameOrder) == length(Obio@parameterList$clusterNameOrder)){
dfResTemp$Cluster <- factor(dfResTemp$Cluster, levels = Obio@parameterList$clusterNameOrder)
}
tag <- paste0(cellTypeIDs[i], "_Percent_clustered")
plotList[[tag]] <- ggplot(
) + geom_bar(aes(x = Cluster, y = Perc_cells, fill = sampleName), data = dfResTemp, stat="identity"
) + labs(title="Percent Cells Per Cluster Clustered", x="", y = "Percent Cells"
) + theme_bw(
) + theme(
panel.border = element_rect(colour = "black", fill=NA, size=1),
plot.title = element_text(hjust = 0.5, size = 12)
) + coord_flip()
h <- sum(c("sampleName", "sampleColor") %in% names(OsC@meta.data))
if (h ==2){
dfCol <- unique(OsC@meta.data[,c("sampleName", "sampleColor")])
dfCol <- dfCol[dfCol$sampleName %in% unique(dfResTemp$sampleName), ]
colVec <- as.vector(dfCol$sampleColor)
names(colVec) <- as.vector(dfCol$sampleName)
plotList[[tag]] <- plotList[[tag]] + scale_fill_manual("Samples" ,values = colVec
)
}
###########################################################################
## Save plot to file ##
FNbase <- paste0(tag, ".percent.cells.clustered",VersionPdfExt)
FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
FNrel <- paste0("report_figures/", FNbase)
pdf(FN)
print(plotList[[tag]])
dev.off()
## ##
###########################################################################
###########################################################################
## Add to chunk ##
figCap <- paste0(
'**Figure ',
figureCount,
'C:** Cell percentages in each cluster for ',
tag,'. Clustered by cluster similarity.',
'Download a pdf of this figure <a href="',FNrel,'" target="_blank">here</a>. '
)
NewChnk <- paste0(
paste0("#### Barchart_ ", tag),
"\n```{r Barchart-percent_",tag,", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",figCap,"'}\n",
"\n",
"\n print(plotList[['",tag,"']])",
"\n cat( '\n')",
"\n\n\n```\n"
)
chnkVec <- c(
chnkVec,
NewChnk
)
## Done adding percentage plot ##
#############################################################################
figureCount <- figureCount + 1
}
##
#############################################################################
}
## Done creating data table ##
###############################################################################
if (length(plotList) > 3){
tabVar <- ".tabset .tabset-fade .tabset-dropdown"
} else {
tabVar <- ".tabset .tabset-fade .tabset-pills"
}
if you could prepare a bar-shape graph with the %of cells clusters representing our populations (like in the Nat Med)
cat(paste(knit(text = chnkVec, quiet = T), collapse = '\n'))
Figure 14A: Cell Count in each cluster for All_NumberDownload a pdf of this figure here.
Figure 14B: Cell percentages in each cluster for All_PercentDownload a pdf of this figure here.
Figure 15C: Cell percentages in each cluster for All_Percent_clustered. Clustered by cluster similarity.Download a pdf of this figure here.
# save(Obio,
# file = paste0(
# Obio@parameterList$localWorkDir,
# Obio@parameterList$project_id,
# ".temp.bioLOGIC.Robj"
# )
# )
#print("Obio Object saved.")
# save(OsC,
# file = paste0(
# Obio@parameterList$localWorkDir,
# Obio@parameterList$project_id,
# ".Seurat.Robj"
# )
# )
library(AUCell)
plotList <- list()
chnkVec <- as.vector(NULL, mode = "character")
# Defined in the section above #
## This needs to become a gmt file ##
if (is.null(Obio@parameterList$catRefFile)){
FNcat <- paste0(hpc.mount, "Projects/schaefera/tobias.ackels/360_scRNAseq_mm_10X_1M_neurons_20k/basedata/asl320.referenceCats.txt")
} else {
FNcat <- Obio@parameterList$catRefFile
}
if (length(grep(".gmt$", FNcat)) > 0){
print("Load gmt file. To be implemented.")
stop()
} else {
dfHeatmapGenes <- read.delim(
FNcat,
header = T,
sep = "\t",
stringsAsFactors = F
)
if (is.null(Obio@parameterList[["cat2DotplotList"]])){
Obio@parameterList[["cat2DotplotList"]] <- list()
}
for (i in 1:ncol(dfHeatmapGenes)){
genes <- unique(as.vector(dfHeatmapGenes[2:nrow(dfHeatmapGenes),i]))
genes <- genes[genes %in% rownames(x = OsC@assays$RNA)]
if (length(unique(genes)) < 61 | (length(unique(genes)) > 0)){
Obio@parameterList[["cat2DotplotList"]][[names(dfHeatmapGenes)[i]]] <- genes
}
if ((length(unique(genes)) < 501) | (length(unique(genes)) > 2) ){
Obio@parameterList[["cat2HMplotList"]] [[names(dfHeatmapGenes)[i]]] <- genes
}
}
}
## Add transcription factors to dotplot ##
if (Obio@parameterList$geneIDcolumn != "mgi_symbol" & Obio@parameterList$geneIDcolumn != "hgnc_symbol") {
queryGS <- "hgnc_symbol"
} else {
queryGS <- Obio@parameterList$geneIDcolumn
}
tempVec <- retrieve.gene.category.from.db(
cat_id = "ag_lab_categories__10",
password = db.pwd,
gene.symbol = queryGS,
user = Obio@parameterList$db.user,
host = Obio@parameterList$host
)
###############################################################################
## If this is fish, translation is non-human or non-mouse, translation is necessary
if (queryGS != Obio@parameterList$geneIDcolumn){
dfAnno <- Obio@dfGeneAnnotation
dfAnno <- unique(dfAnno[,c("hgnc_symbol",Obio@parameterList$geneIDcolumn )])
dfAnno <- dfAnno[dfAnno$hgnc_symbol != "", ]
dfAnno <- dfAnno[dfAnno$hgnc_symbol %in% tempVec, ]
tempVec <- unique(dfAnno[,Obio@parameterList$geneIDcolumn])
tempVec <- tempVec[tempVec != ""]
}
dfHMG <- dfGeneralMarkers[dfGeneralMarkers$gene %in% tempVec, ]
dfHMGsel <- data.frame(dfHMG %>% group_by(cluster) %>% top_n(5, avg_diff))
geneVec <- as.vector(unique(dfHMGsel$gene))
if (length(geneVec) > 0){
Obio@parameterList[["cat2DotplotList"]][["Top5_TF_per_cluster_Markers"]] <- geneVec
}
## Add cluster defining transcription factors to the collection ##
## For the dotplot ##
###############################################################################
## Get backdrop
exprMatrix <- as.matrix(OsC@assays$RNA@counts)
#logMat <- log10(exprMatrix+1)
# When using a Seurat object #
logMat <- data.frame(OsC[["RNA"]]@data)
## Load tSNE coordinates ##
cellsTsne <- data.frame(OsC@reductions$umap@cell.embeddings)
## done
FNbase <- paste0("CatScatter_Rankings", VersionPdfExt)
FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
FNrel <- paste0("report_figures/", FNbase)
pdf(FN)
cells_rankings <- AUCell_buildRankings(exprMatrix)
dev.off()
geneSets <- Obio@parameterList$cat2DotplotList
cells_AUC <- AUCell_calcAUC(geneSets, cells_rankings, aucMaxRank=nrow(cells_rankings)*0.05)
## Select thresholds ##
FNbase <- paste0("CatScatterHist", VersionPdfExt)
FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
FNrel <- paste0("report_figures/", FNbase)
pdf(FN)
set.seed(123)
cells_assignment <- AUCell_exploreThresholds(
cells_AUC,
plotHist=TRUE,
nCores=1,
assign=TRUE
)
dev.off()
## Add data to dfExpr ##
## Plot CatScatters ##
for (i in 1:length(Obio@parameterList$cat2DotplotList)){
HMname <- names(Obio@parameterList$cat2DotplotList)[i]
tag <- gsub("[.]", "_", HMname)
FNbase <- paste0("CatScatterHist_", HMname, VersionPdfExt)
FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
FNrel <- paste0("report_figures/", FNbase)
selectedThresholds <- cells_assignment[[i]]$aucThr$thresholds
if ("minimumDens" %in% rownames(selectedThresholds)) {
pThr <- selectedThresholds["minimumDens", "threshold"]
} else if ("Global_k1" %in% rownames(selectedThresholds)){
pThr <- selectedThresholds["Global_k1", "threshold"]
} else {
pThr <- selectedThresholds[1, "threshold"]
}
if (nrow(cellsTsne) > 15000){
cex = 0.25
} else if (nrow(cellsTsne) > 1000){
cex = 0.5
} else {
cex = 1
}
## Get AUC matrix ##
tSNE.df <- data.frame(cellsTsne, cell=rownames(cellsTsne))
mAUC <- getAUC(cells_AUC)[HMname,rownames(tSNE.df)]
dfAUC <- data.frame(mAUC)
dfAUC[["cellID"]] <- row.names(dfAUC)
dfAUC <- merge(dfAUC, tSNE.df, by.x = "cellID", by.y = "cell")
dfDocAUC <- unique(dfAUC[,c("cellID", "mAUC")])
dfDocAUC[["cat"]] <- paste0("Cat_",tag)
if (i == 1){
dfResAUC <- dfDocAUC
} else {
dfResAUC <- rbind(
dfResAUC,
dfDocAUC
)
}
input <- list(
"x_axis" = "UMAP1",
"y_axis" = "UMAP2",
"gene" = HMname
)
#dotsize <- cex
legendNote <- paste0(
" The following genes of this dataset are represented in this figure: ",
paste0(sort(Obio@parameterList$cat2DotplotList[[i]]), collapse = ", ")
)
plotList[[tag]] <- ggplot(data = dfAUC, aes(x=UMAP_1, y=UMAP_2, color = mAUC)
)+ geom_point( shape=16, size = dotsize
) + scale_color_gradient("AUC", low="grey", high="darkblue"
) + xlab(input$x_axis) + ylab(input$y_axis
) + theme_bw(
) + theme(
axis.text.y = element_text(size=8),
axis.text.x = element_text(size=8),
axis.title.y = element_text(size=8),
axis.title.x = element_text(size=8),
axis.line = element_line(colour = "black"),
panel.border = element_rect(colour = "black", fill=NA, size=1),
plot.title = element_text(hjust = 0.5, size = 12)
)+ ggtitle(paste0("Category: ", input$gene)
) + coord_fixed(ratio = 1
)
#+ theme(legend.position="none")
FNbase <- paste0("CatScatter", HMname, VersionPdfExt)
FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
FNrel <- paste0("report_figures/", FNbase)
pdf(FN)
print(plotList[[tag]])
dev.off()
## Create R markdown chunk ##
figLegend <- paste0(
'**Figure ',
figureCount,
'A:** Category Scatter showing gene category ',
HMname, '. ', legendNote,
'. Download a pdf of this figure <a href="',FNrel,'" target="_blank">here</a>. '
)
NewChnk <- paste0(
"#### Category Feature Plot ",HMname,
"\n```{r CatFeatPlot1_",
i,", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
figLegend,"'}\n",
"\n",
"\n print(plotList[['",tag,"']])",
"\n cat( '\n')",
"\n\n\n```\n"
)
chnkVec <- c(
chnkVec,
NewChnk
)
###########################################################################
## Add part B - dotplot ##
DefaultAssay(OsC) <- "RNA"
# OsC@meta.data[["hmIdent2"]] <- paste0("C", OsC@meta.data[,"seurat_clusters"])
#
# levels <- paste0(
# "C",
# sort(unique(OsC@meta.data[,"seurat_clusters"]))
# )
#
# OsC@meta.data$hmIdent2 <- factor(OsC@meta.data$hmIdent2, levels=levels)
Idents(OsC) <- "clusterName"
HMname <- paste0("Dotplot_", names(Obio@parameterList$cat2DotplotList)[i])
tag <- gsub("[.]", "_", HMname)
dpGenes <- unique(Obio@parameterList$cat2DotplotList[[i]])
legendNote <- paste0("The following genes were found in this category and the single-cell dataset: ", paste0(dpGenes, collapse=", "))
OsC_DP <- OsC
OsC_DP@meta.data$clusterName <- factor(OsC_DP@meta.data$clusterName, levels = Obio@parameterList[["clusterNameOrder"]])
dfCol <- unique(OsC@meta.data[,c("clusterName", "clusterColor")])
if(!is.null(Obio@parameterList$clusterNameOrder)){
row.names(dfCol) <- dfCol$clusterName
dfCol <- dfCol[Obio@parameterList$clusterNameOrder,]
}
colVec <- as.vector(dfCol$clusterColor)
names(colVec) <- dfCol$clusterName
textSize <- 2
if (length(colVec) > 5){
textSize <-1
} else if (length(colVec) > 20){
textSize <- 0.75
} else if (length(colVec) > 40){
textSize <- 0.5
}
plotList[[tag]] <- DotPlotSB(
object = OsC_DP,
features = dpGenes,
#cols = cols,
group.by = NULL,
split.by = NULL,
dot.scale = 4,
col.min = 0,
col.max = 5,
assay = "RNA"
) + ggtitle(gsub("_", " ", tag)
) + coord_fixed(
#) + coord_flip(
) + theme_bw() + theme(
axis.title.y = element_blank(),
axis.title.x = element_blank()
) + theme(axis.text.x = element_text(size=rel(textSize), angle = 45, hjust=1, color = colVec))
# plotList[[tag]] <- DotPlot(
# object = OsC_DP,
# features = dpGenes,
# #cols = cols,
# group.by = NULL,
# split.by = NULL,
# dot.scale = 4,
# col.min = 0,
# col.max = 5,
# assay = "RNA"
# ) + ggtitle(gsub("_", " ", tag)) + coord_fixed() + coord_flip() + theme_bw() + theme(
# axis.title.y = element_blank(),
# axis.title.x = element_blank()
# ) + theme(axis.text.x = element_text(size=rel(0.5), angle = 45, hjust=1, color = colVec))
rm(OsC_DP)
FNbase <- paste0(HMname, VersionPdfExt)
FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
FNrel <- paste0("report_figures/", FNbase)
pdf(FN)
print(plotList[[tag]])
dev.off()
## Create R markdown chunk ##
figLegend <- paste0(
'**Figure ',
figureCount,
'B:** Dotplot showing gene category ',
HMname, '. ', legendNote,
'. Download a pdf of this figure <a href="',FNrel,'" target="_blank">here</a>. '
)
figureCount <- figureCount + 1
NewChnk <- paste0(
"\n```{r ",tag,
", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
figLegend,"'}\n",
"\n",
"\n print(plotList[['",tag,"']])",
"\n cat( '\n')",
"\n\n\n```\n"
)
chnkVec <- c(
chnkVec,
NewChnk
)
## Done adding dotplot ##
###########################################################################
}
Obio@dataTableList[["dfResAUC"]] <- dfResAUC
if (length(plotList) > 3){
tabVar <- ".tabset .tabset-fade .tabset-dropdown"
} else {
tabVar <- ".tabset .tabset-fade .tabset-pills"
}
cat(paste(knit(text = chnkVec, quiet = T), collapse = '\n'))
Figure 16A: Category Scatter showing gene category Female_Specifdic_Genes. The following genes of this dataset are represented in this figure: RPS4X, XIST. Download a pdf of this figure here.
Figure 16B: Dotplot showing gene category Dotplot_Female_Specifdic_Genes. The following genes were found in this category and the single-cell dataset: XIST, RPS4X. Download a pdf of this figure here.
Figure 17A: Category Scatter showing gene category Male_specific_Genes. The following genes of this dataset are represented in this figure: KDM5D, USP9Y, UTY. Download a pdf of this figure here.
Figure 17B: Dotplot showing gene category Dotplot_Male_specific_Genes. The following genes were found in this category and the single-cell dataset: KDM5D, USP9Y, UTY. Download a pdf of this figure here.
Figure 18A: Category Scatter showing gene category GO_EXECUTION_PHASE_OF_APOPTOSIS. The following genes of this dataset are represented in this figure: ACIN1, ACVR1C, AIFM3, AKT1, APC, BBC3, BCAP31, BIRC2, BLCAP, BMX, BNIP1, CAPN10, CASP2, CASP3, CASP6, CASP7, CASP8, CDK5RAP3, CECR2, CFLAR, CIDEB, CIDEC, CLSPN, DEDD2, DFFA, DFFB, DICER1, DNASE1L3, DNASE2, DNASE2B, DNM1L, ENDOG, ERN2, FNTA, FOXL2, H1F0, HMGB1, HMGB2, HTRA2, KPNA1, KPNB1, MADD, PRKCD, PRKCQ, PTK2, ROCK1, SATB1, SHARPIN, STK24, TAOK1, TARDBP, TOP2A, XKR8. Download a pdf of this figure here.
Figure 18B: Dotplot showing gene category Dotplot_GO_EXECUTION_PHASE_OF_APOPTOSIS. The following genes were found in this category and the single-cell dataset: ACIN1, ACVR1C, AIFM3, AKT1, APC, BBC3, BCAP31, BIRC2, BLCAP, BMX, BNIP1, CAPN10, CASP2, CASP3, CASP6, CASP7, CASP8, CDK5RAP3, CECR2, CFLAR, CIDEB, CIDEC, CLSPN, DEDD2, DFFA, DFFB, DICER1, DNASE1L3, DNASE2, DNASE2B, DNM1L, ENDOG, ERN2, FNTA, FOXL2, H1F0, HMGB1, HMGB2, HTRA2, KPNA1, KPNB1, MADD, PRKCD, PRKCQ, PTK2, ROCK1, SATB1, SHARPIN, STK24, TAOK1, TARDBP, TOP2A, XKR8. Download a pdf of this figure here.
Figure 19A: Category Scatter showing gene category Proliferation. The following genes of this dataset are represented in this figure: PCNA, TOP2A. Download a pdf of this figure here.
Figure 19B: Dotplot showing gene category Dotplot_Proliferation. The following genes were found in this category and the single-cell dataset: TOP2A, PCNA. Download a pdf of this figure here.
Figure 20A: Category Scatter showing gene category Memory_CD4_plus. The following genes of this dataset are represented in this figure: CCR7, CD4. Download a pdf of this figure here.
Figure 20B: Dotplot showing gene category Dotplot_Memory_CD4_plus. The following genes were found in this category and the single-cell dataset: CCR7, CD4. Download a pdf of this figure here.
Figure 21A: Category Scatter showing gene category CD14_plus_Mono. The following genes of this dataset are represented in this figure: CD14, LYZ. Download a pdf of this figure here.
Figure 21B: Dotplot showing gene category Dotplot_CD14_plus_Mono. The following genes were found in this category and the single-cell dataset: CD14, LYZ. Download a pdf of this figure here.
Figure 22A: Category Scatter showing gene category Naive_CD4pos_T. The following genes of this dataset are represented in this figure: CD4, IL7R, S100A4. Download a pdf of this figure here.
Figure 22B: Dotplot showing gene category Dotplot_Naive_CD4pos_T. The following genes were found in this category and the single-cell dataset: IL7R, S100A4, CD4. Download a pdf of this figure here.
Figure 23A: Category Scatter showing gene category B_cell. The following genes of this dataset are represented in this figure: MS4A1. Download a pdf of this figure here.
Figure 23B: Dotplot showing gene category Dotplot_B_cell. The following genes were found in this category and the single-cell dataset: MS4A1. Download a pdf of this figure here.
Figure 24A: Category Scatter showing gene category CD8pos_T. The following genes of this dataset are represented in this figure: CD8A. Download a pdf of this figure here.
Figure 24B: Dotplot showing gene category Dotplot_CD8pos_T. The following genes were found in this category and the single-cell dataset: CD8A. Download a pdf of this figure here.
Figure 25A: Category Scatter showing gene category FCGR3Apos_Mono. The following genes of this dataset are represented in this figure: FCGR3A, MS4A7. Download a pdf of this figure here.
Figure 25B: Dotplot showing gene category Dotplot_FCGR3Apos_Mono. The following genes were found in this category and the single-cell dataset: FCGR3A, MS4A7. Download a pdf of this figure here.
Figure 26A: Category Scatter showing gene category NK. The following genes of this dataset are represented in this figure: GNLY, NKG7. Download a pdf of this figure here.
Figure 26B: Dotplot showing gene category Dotplot_NK. The following genes were found in this category and the single-cell dataset: GNLY, NKG7. Download a pdf of this figure here.
Figure 27A: Category Scatter showing gene category DC. The following genes of this dataset are represented in this figure: CST3, FCER1A. Download a pdf of this figure here.
Figure 27B: Dotplot showing gene category Dotplot_DC. The following genes were found in this category and the single-cell dataset: FCER1A, CST3. Download a pdf of this figure here.
Figure 28A: Category Scatter showing gene category Mk. The following genes of this dataset are represented in this figure: PPBP. Download a pdf of this figure here.
Figure 28B: Dotplot showing gene category Dotplot_Mk. The following genes were found in this category and the single-cell dataset: PPBP. Download a pdf of this figure here.
Figure 29A: Category Scatter showing gene category Top5_TF_per_cluster_Markers. The following genes of this dataset are represented in this figure: BCL11A, BCL11B, CEBPD, DAZAP2, ETS1, FOS, ID2, IRF8, KLF4, LEF1, LRRFIP1, MEF2C, MYBL1, RORA, RXRA, SPI1, SPIB, TBX21, TCF4, TCF7, XBP1, ZEB2. Download a pdf of this figure here.
Figure 29B: Dotplot showing gene category Dotplot_Top5_TF_per_cluster_Markers. The following genes were found in this category and the single-cell dataset: SPI1, CEBPD, KLF4, FOS, ZEB2, TCF7, ETS1, BCL11B, LEF1, RXRA, BCL11A, TCF4, IRF8, SPIB, MEF2C, LRRFIP1, DAZAP2, RORA, MYBL1, TBX21, XBP1, ID2. Download a pdf of this figure here.
## Get average Expression for all cells
dfAvgExprAllCells <- Obio@dataTableList$dfAvglg10ExprAll
## Get average expression per cluster
dfAvgExprByCluster <- Obio@dataTableList$dfAvglg10ExprPerCluster
## Merge
dfDataTable <- merge(
dfAvgExprAllCells,
dfAvgExprByCluster,
by.x = "gene",
by.y = "gene",
all =T
)
dfDataTable[is.na(dfDataTable)] <- 0
clusterVec <- names(dfDataTable)
clusterVec <- clusterVec[!(clusterVec %in% c("gene", "all"))]
c2Vec <- clusterVec[clusterVec %in% Obio@parameterList$clusterNameOrder]
if (length(c2Vec) == length(clusterVec)){
clusterVec <- Obio@parameterList$clusterNameOrder
}
slopeVec <- as.vector(NULL, mode = "numeric")
intersectVec <- as.vector(NULL, mode = "numeric")
for (i in 1:length(clusterVec)){
LMformula <- as.formula(paste0(clusterVec[i]," ~ all"))
clusterFit <- lm(data=dfDataTable, formula = LMformula)
slopeVec <- c(
slopeVec,
coef(clusterFit)[[2]]
)
intersectVec <- c(
intersectVec,
coef(clusterFit)[[1]]
)
residuals <- round(clusterFit$residuals, 3)
dfTempResiduals <- data.frame(dfDataTable$gene, residuals, stringsAsFactors = F)
names(dfTempResiduals) <- c("gene", paste0(clusterVec[i], "_Residuals"))
if (i ==1){
dfClusterResiduals <- dfTempResiduals
} else {
dfClusterResiduals <- merge(
dfClusterResiduals,
dfTempResiduals,
by.x = "gene",
by.y = "gene",
all =T
)
}
dfClusterResiduals[is.na(dfClusterResiduals)] <- 0
}
Obio@dataTableList[["dfClusterResiduals"]] <- dfClusterResiduals
###############################################################################
## Select a suitable cut-off for marker genes ##
library(tidyverse)
dfLongResiduals <- data.frame(
dfClusterResiduals %>% pivot_longer(!gene, names_to = "cluster", values_to = "residuals")
)
dfRegMins <- data.frame(dfLongResiduals %>% group_by(cluster) %>% top_n(10, residuals) %>% summarize(min10 = min(residuals)))
## Select as cut-off: median of all so at least half the cluster have 10 marker genes
LRcutOff <- median(dfRegMins$min10)
if (LRcutOff > 1){
LRcutOff <- 1
}
LRcutOff <- round(LRcutOff, 3)
## Done ##
###############################################################################
## Make gene set with Residual marker genes ##
if (Obio@parameterList$geneIDcolumn != "hgnc_symbol" & Obio@parameterList$geneIDcolumn != "mgi_symbol"){
refGeneIDcolumn <- "hgnc_symbol"
dfAnno <- Obio@dfGeneAnnotation
dfAnno <- unique(dfAnno[,c("hgnc_symbol",Obio@parameterList$geneIDcolumn )])
dfAnno <- dfAnno[dfAnno[,Obio@parameterList$geneIDcolumn] %in% dfClusterResiduals[,"gene"],]
dfClusterResiduals <- merge(
dfClusterResiduals,
dfAnno,
by.x = "gene",
by.y = Obio@parameterList$geneIDcolumn
)
dfClusterResiduals$gene <- NULL
names(dfClusterResiduals) <- gsub("hgnc_symbol", "gene",names(dfClusterResiduals))
} else {
refGeneIDcolumn <- Obio@parameterList$geneIDcolumn
}
residualClusterMarkers <- list()
clusterVec <- names(dfClusterResiduals)
clusterVec <- clusterVec[clusterVec != "gene"]
for (i in 1:length(clusterVec)){
clusterGenes <- as.vector(sort(unique(dfClusterResiduals[dfClusterResiduals[, clusterVec[i]] > LRcutOff,"gene"])))
if (length(clusterGenes) > 1){
residualClusterMarkers[[paste0(clusterVec[[i]], "_Linear_Regression_Markers_", LRcutOff)]] <- c(
paste0(clusterVec[[i]], "_Linear_Regression_Markers_", gsub("\\.", "_", LRcutOff) , " Experiment ", Obio@parameterList$project_id),
clusterGenes
)
}
}
## Upload Marker Genes for this Project ##
#######################################################################
## Upload/update category by category ##
updatedCatIDs <- as.vector(NULL, mode = "character")
updatedCatNames <- as.vector(NULL, mode = "character")
for (i in 1:length(residualClusterMarkers)){
cat.name <- names(residualClusterMarkers)[i]
cat_type <- paste0("temp_cluster_marker_", Obio@parameterList$project_id)
cat.description.text <- as.vector(residualClusterMarkers[[i]][1])
gene.vec <- as.vector(
residualClusterMarkers[[i]]
)[2:length(residualClusterMarkers[[i]])]
gene.vec <- gene.vec[gene.vec != ""]
gene.vec <- sort(na.omit(gene.vec))
## Determine if cat exists ##
catID <- add.category.to.lab.reference.table.hs(
host = Obio@dbDetailList$host,
pwd = db.pwd,
user = Obio@dbDetailList$db.user,
cat.ref.db = Obio@dbDetailList$ref.cat.db,
cat.ref.db.table = Obio@parameterList$lab.categories.table,
gene.vector = gene.vec,
gene.id = refGeneIDcolumn, #options hgnc_symbol, mgi_symbol
mm.hs.conversion.file = "assets/annotation/homologene.data.txt",
cat_name = cat.name,
cat_type = cat_type,
data_source = paste0(Obio@parameterList$labname, " Lab"),
comments_1 = "",
comments_2 = "",
new.lab.category.table = FALSE,
cat.description.db = "internal_categories",
cat.description.db.table = "category_description",
cat.description.text = cat.description.text,
lab.name = Obio@parameterList$labname,
replaceExistingCatName = TRUE
)
updatedCatIDs <- c(
updatedCatIDs,
catID
)
updatedCatNames <- c(
updatedCatNames,
cat.name
)
} ## End dfDat loop
dfCatNameLR <- data.frame(catID = updatedCatIDs, cat_name= updatedCatNames, stringsAsFactors = F)
Obio@dataTableList[["residualClusterMarkers"]] <- residualClusterMarkers
###############################################################################
## Creaate Plot array ##
plotList <- list()
chnkVec <- as.vector(NULL, mode = "character")
# x-axis: all
# y-axis: intensity cluster X
# highlight: Marker genes
dfClusterResiduals <- Obio@dataTableList$dfClusterResiduals
clusterVec <- names(dfDataTable)
clusterVec <- clusterVec[!(clusterVec %in% c("gene", "all"))]
c2Vec <- clusterVec[clusterVec %in% Obio@parameterList$clusterNameOrder]
if (length(c2Vec) == length(clusterVec)){
clusterVec <- Obio@parameterList$clusterNameOrder
}
h <- sum(c("seurat_clusters","clusterName", "clusterColor") %in% names(OsC@meta.data))
## Determine cluster colors ##
dfClustCol <- data.frame(cluster=clusterVec, clusterCol=clusterVec)
dfClustCol$clusterCol <- as.numeric(gsub("C_", "", dfClustCol$clusterCol))
#dfClustCol <- dfClustCol[order(dfClustCol$clusterCol, decreasing = F),]
dfClustCol <- unique(OsC@meta.data[,c("clusterName", "clusterColor", "seurat_clusters")])
dfClustCol$clusterName <- factor(dfClustCol$clusterName , levels = clusterVec)
dfClustCol <- dfClustCol[order(dfClustCol$clusterName),]
# library(scales)
# clusterCols = hue_pal()(nrow(dfClustCol))
# dfClustCol[["clusterCol"]] <- clusterCols
clusterVec <- as.vector(dfClustCol$clusterName)
clusterCol <- as.vector(dfClustCol$clusterColor)
for (i in 1:length(clusterVec)){
## Determine top10 markers ##
clustCol <- names(dfClusterResiduals)[grep(paste0(clusterVec[i],"_"), names(dfClusterResiduals))]
dfClusterResiduals <- dfClusterResiduals[order(dfClusterResiduals[,clustCol], decreasing = T), ]
markerVec <- as.vector(dfClusterResiduals[1:10,"gene"])
dfTempResiduals <- dfClusterResiduals[dfClusterResiduals[,clustCol] > LRcutOff, ]
highlightGenes <- as.vector(dfTempResiduals[,"gene"])
dfPlot <- dfDataTable[,c("gene", "all", clusterVec[i])]
dfPlot[["label"]] <- ""
dfPlot[dfPlot$gene %in% markerVec, "label"] <- dfPlot[dfPlot$gene %in% markerVec, "gene"]
## Add highlight ##
dfPlot[["ClusterMarker"]] <- ""
dfPlot[dfPlot$gene %in% highlightGenes, "ClusterMarker"] <- "+"
dfPlot[["x"]] <- dfPlot$all
dfPlot[["y"]] <- dfPlot[, clusterVec[i]]
tag <- paste0("LinearRegressionMarkers_Cluster", clusterVec[i])
## Create fitted line ##
lineFit <- lm(data=dfPlot, formula = y ~ x)
library(ggrepel)
plotList[[tag]] <- ggplot(
data = dfPlot,
aes(
x=x,
y=y, label = label, color = ClusterMarker)
#) + geom_abline(intercept = intersectVec[i], slope = slopeVec[i], linetype="dashed", color = "grey"
) + geom_smooth(method='lm', formula= y ~ x, linetype="dashed", se = T, colour = "grey"
) + geom_point( shape=16, size = dotsize
) + xlab("Average Expression All Cells") + ylab(paste0("Average Expression ", clusterVec[[i]])
) + theme_bw(
) + theme(
axis.text.y = element_text(size=8),
axis.text.x = element_text(size=8),
axis.title.y = element_text(size=8),
axis.title.x = element_text(size=8),
axis.line = element_line(colour = "black"),
panel.border = element_rect(colour = "black", fill=NA, size=1),
plot.title = element_text(hjust = 0.5, size = 12)
) + ggtitle(paste0("Cluster Markers ", clusterVec[[i]])
) + scale_color_manual(values=c("#000000", clusterCol[i])
) + geom_text_repel(
) + guides(col = guide_legend(override.aes = list(shape = 16, size = legendDotSize))
)
# + xlim(0, xmax) + ylim(0, ymax)
###########################################################################
## Save plot to file ##
FNbase <- paste0("Linear.regression.cluster.markers.",clusterVec[i], VersionPdfExt)
FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
FNrel <- paste0("report_figures/", FNbase)
pdf(FN)
print(plotList[[tag]])
dev.off()
## ##
###########################################################################
pos <- grep(paste0(clusterVec[i],"_"), dfCatNameLR$cat_name)
LRcatID <- ""
if (length(pos) == 1){
LRcatID <- as.vector(dfCatNameLR[pos, "catID"])
}
clusterVec <- as.vector(dfClustCol$clusterName)
clusterCol <- as.vector(dfClustCol$clusterCol)
pos <- grep(paste0(gsub("C_", "Cluster_",clusterVec[i]),"$"), dfFAMplotIDs$cat_name)
DGEcatID <- ""
if (length(pos) == 1){
DGEcatID <- as.vector(dfFAMplotIDs[pos, "cat_id"])
}
if (length(Obio@sampleDetailList) == 1){
prefix <- "norm_"
} else {
prefix <- "add_"
}
if (LRcatID != ""){
heatmaplink <- paste0('In order to identify cluster specific marker genes a <a href="https://biologic.crick.ac.uk/',Obio@parameterList$project_id,'/category-view/', LRcatID,'" target="_blank">cluster-maker gene heatmap for the linear-regression derrived marker gene set</a> might be interesting.')
link <- paste0('<a href="https://',urlString,'/',Obio@parameterList$project_id,'/scatterplot?x_axis=add_counts_Avg_log10_Expr_all&y_axis=',prefix,'counts_Avg_log10_Expr_',clusterVec[i], '&cat_id=', LRcatID,'" target="_blank">here</a>')
} else {
heatmaplink <- ""
link <- ""
}
if (DGEcatID != ""){
linkDGE <- paste0('<a href="https://',urlString,'/',Obio@parameterList$project_id,'/scatterplot?x_axis=add_counts_Avg_log10_Expr_all&y_axis=',prefix,'counts_Avg_log10_Expr_',clusterVec[i], '&cat_id=', DGEcatID,'" target="_blank">here</a>')
heatmaplinkDGE <- paste0('In order to identify cluster specific marker genes a <a href="https://biologic.crick.ac.uk/',Obio@parameterList$project_id,'/category-view/', DGEcatID,'" target="_blank">cluster-maker gene heatmap for the differential-gene expression derrived marker gene set</a> might be interesting.')
} else {
linkDGE <- ""
heatmaplinkDGE <- ""
}
figCap <- paste0(
'**Figure ',
figureCount,
':** Average gene expression intensity of all cells together versus averaged expression intensities for cluster ',clusterVec[i],'. ',
'Download a pdf of this figure <a href="',FNrel,'" target="_blank">here</a>. ',
'An interactive version of this scatterplot can be found ', link, ' with the linear regression marker genes highlighted, and ',linkDGE,' with the differential gene expression marker genes highlighted. ',
heatmaplink,
heatmaplinkDGE
)
NewChnk <- paste0(
"#### ",tag,
"\n```{r LR_marker_",tag,", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",figCap,"'}\n",
"\n",
"\n print(plotList[['",tag,"']])",
"\n cat( '\n')",
"\n\n\n```\n"
)
## Histogram Part C done ##
###########################################################################
chnkVec <- c(
chnkVec,
NewChnk
)
}
## Done creating plot array ##
###############################################################################
if (length(plotList) > 3){
tabVar <- ".tabset .tabset-fade .tabset-dropdown"
} else {
tabVar <- ".tabset .tabset-fade .tabset-pills"
}
## plot list will be integrated in full figure ##
cat(paste(knit(text = chnkVec, quiet = T), collapse = '\n'))
Figure 30: Average gene expression intensity of all cells together versus averaged expression intensities for cluster C0. Download a pdf of this figure here. An interactive version of this scatterplot can be found here with the linear regression marker genes highlighted, and here with the differential gene expression marker genes highlighted. In order to identify cluster specific marker genes a cluster-maker gene heatmap for the linear-regression derrived marker gene set might be interesting.In order to identify cluster specific marker genes a cluster-maker gene heatmap for the differential-gene expression derrived marker gene set might be interesting.
Figure 30: Average gene expression intensity of all cells together versus averaged expression intensities for cluster C1. Download a pdf of this figure here. An interactive version of this scatterplot can be found with the linear regression marker genes highlighted, and here with the differential gene expression marker genes highlighted. In order to identify cluster specific marker genes a cluster-maker gene heatmap for the differential-gene expression derrived marker gene set might be interesting.
Figure 30: Average gene expression intensity of all cells together versus averaged expression intensities for cluster C2. Download a pdf of this figure here. An interactive version of this scatterplot can be found here with the linear regression marker genes highlighted, and here with the differential gene expression marker genes highlighted. In order to identify cluster specific marker genes a cluster-maker gene heatmap for the linear-regression derrived marker gene set might be interesting.In order to identify cluster specific marker genes a cluster-maker gene heatmap for the differential-gene expression derrived marker gene set might be interesting.
Figure 30: Average gene expression intensity of all cells together versus averaged expression intensities for cluster C3. Download a pdf of this figure here. An interactive version of this scatterplot can be found here with the linear regression marker genes highlighted, and here with the differential gene expression marker genes highlighted. In order to identify cluster specific marker genes a cluster-maker gene heatmap for the linear-regression derrived marker gene set might be interesting.In order to identify cluster specific marker genes a cluster-maker gene heatmap for the differential-gene expression derrived marker gene set might be interesting.
Figure 30: Average gene expression intensity of all cells together versus averaged expression intensities for cluster C4. Download a pdf of this figure here. An interactive version of this scatterplot can be found here with the linear regression marker genes highlighted, and here with the differential gene expression marker genes highlighted. In order to identify cluster specific marker genes a cluster-maker gene heatmap for the linear-regression derrived marker gene set might be interesting.In order to identify cluster specific marker genes a cluster-maker gene heatmap for the differential-gene expression derrived marker gene set might be interesting.
plotList <- list()
chnkVec <- as.vector(NULL, mode = "character")
###############################################################################
## Make Heatmap ##
#OsC@meta.data[["hmIdent2"]] <- paste0("C", OsC@meta.data[,"seurat_clusters"])
Idents(OsC) <- "clusterName"
# levels <- Obio@parameterList$clusterNameOrder
#
# levels(OsC) <- levels
## Deal with more than 5000 cells ##
if (nrow(OsC@meta.data) > 5000){
set.seed(127)
n.cells <- 5000
OsC_HM <- OsC
OsC_HM@meta.data[["HM_sel"]] <- 0
selPos <- sample(x = nrow(OsC_HM@meta.data), size = n.cells, replace = FALSE, prob = NULL)
OsC_HM@meta.data[selPos, "HM_sel"] <- 1
OsC_HM <- subset(x = OsC_HM, subset = HM_sel == 1)
subsetString <- paste0("For this heatmap 5000 cells were randomly selected from ", nrow(OsC@meta.data)," cells in the experiment. ")
} else {
OsC_HM <- OsC
subsetString <- ""
}
## Scale Data ##
allGenes <- rownames(x = OsC_HM@assays$RNA)
OsC_HM <- ScaleData(OsC_HM, verbose = FALSE, features=allGenes)
Idents(OsC_HM) <- "clusterName"
Idents(OsC_HM) <- factor(Idents(OsC_HM), levels = Obio@parameterList[["clusterNameOrder"]])
## Order Heatmap Genes by cluster similarity
## For the moment: resetting heatmap list to keep it short ##
## Select heatmap genes ##
## Make general markers heatmpa ##
dfGeneralMarkers <- Obio@dataTableList$dfGeneralMarkers
dfGeneralMarkersPos <- dfGeneralMarkers[dfGeneralMarkers$avg_diff > 0,]
Nsel <- 1
Ncluster <- length(unique(OsC@meta.data$clusterName))
geneVec <- as.vector(NULL, mode="character")
while(length(geneVec) < 5*Ncluster & Nsel < nrow(dfGeneralMarkersPos)){
geneVec <- as.vector(unique(data.frame(dfGeneralMarkersPos %>% group_by(cluster) %>% top_n(Nsel, avg_diff))[,"gene"]))
Nsel <- Nsel + 1
}
if (length(geneVec) > 0){
Obio@parameterList[["cat2HMplotList"]][["Top_Cluster_Markers"]] <- geneVec
}
## Add top transcription factors for each cluster ##
## Get transcription factor genes ##
if (Obio@parameterList$geneIDcolumn != "mgi_symbol" & Obio@parameterList$geneIDcolumn != "hgnc_symbol") {
queryGS <- "hgnc_symbol"
} else {
queryGS <- Obio@parameterList$geneIDcolumn
}
tempVec <- retrieve.gene.category.from.db(
cat_id = "ag_lab_categories__10",
password = db.pwd,
gene.symbol = queryGS,
user = Obio@parameterList$db.user,
host = Obio@parameterList$host
)
###############################################################################
## If this is fish, translation is non-human or non-mouse, translation is necessary
if (queryGS != Obio@parameterList$geneIDcolumn){
dfAnno <- Obio@dfGeneAnnotation
dfAnno <- unique(dfAnno[,c("hgnc_symbol",Obio@parameterList$geneIDcolumn )])
dfAnno <- dfAnno[dfAnno$hgnc_symbol != "", ]
dfAnno <- dfAnno[dfAnno$hgnc_symbol %in% tempVec, ]
tempVec <- unique(dfAnno[,Obio@parameterList$geneIDcolumn])
tempVec <- tempVec[tempVec != ""]
}
dfHMG <- dfGeneralMarkersPos[dfGeneralMarkersPos$gene %in% tempVec, ]
dfHMGsel <- data.frame(dfHMG %>% group_by(cluster) %>% top_n(5, avg_diff))
if (length(as.vector(unique(dfHMGsel$gene))) > 0){
Obio@parameterList[["cat2HMplotList"]][["Top_TF_Cluster_Markers"]] <- as.vector(unique(dfHMGsel$gene))
}
## Done with translation
## Add linear regression heatmap for all genes ##
dfClusterResiduals <- Obio@dataTableList[["dfClusterResiduals"]]
dfLongResiduals <- data.frame(
dfClusterResiduals %>% pivot_longer(!gene, names_to = "cluster", values_to = "residuals")
)
dfTop5 <- data.frame(dfLongResiduals %>% group_by(cluster) %>% top_n(5, residuals))
dfTop5 <- dfTop5[order(dfTop5$cluster),]
geneVec <- as.vector(unique(dfTop5$gene))
if (length(geneVec) > 0){
Obio@parameterList[["cat2HMplotList"]][["Top_LR_Cluster_Markers"]] <- geneVec
}
## Add linear regression heatmap for transcription factors ##
dfLRsel <- dfLongResiduals[dfLongResiduals$gene %in% tempVec, ]
dfTop5 <- data.frame(dfLRsel %>% group_by(cluster) %>% top_n(5, residuals))
dfTop5 <- dfTop5[order(dfTop5$cluster),]
geneVec <- as.vector(unique(dfTop5$gene))
if (length(geneVec) > 0){
Obio@parameterList[["cat2HMplotList"]][["Top_LR_TF_Cluster_Markers"]] <- geneVec
}
## Invert listing order##
orderVec <- rev(names(Obio@parameterList$cat2HMplotList))
topVec <- c(
"Top_Cluster_Markers",
"Top_TF_Cluster_Markers",
"Top_LR_Cluster_Markers",
"Top_LR_TF_Cluster_Markers"
)
topVec <- topVec[topVec %in% orderVec]
orderVec <- orderVec[!(orderVec %in% topVec)]
orderVec <- c(
topVec,
orderVec
)
Obio@parameterList$cat2HMplotList <- Obio@parameterList$cat2HMplotList[orderVec]
for (i in 1:length(Obio@parameterList[["cat2HMplotList"]])){
tag <- paste0("HM_", names(Obio@parameterList$cat2HMplotList)[i])
textSize <- 5
HMname <- names(Obio@parameterList[["cat2HMplotList"]])[i]
colors <- unique(OsC_HM@meta.data$clusterColor)
plotList[[tag]] <- DoHeatmap(
object = OsC_HM,
features = Obio@parameterList[["cat2HMplotList"]][[i]],
#group.by = "hmIdent",
draw.lines =T,
label = T,
group.bar = TRUE,
slot = "scale.data",
lines.width = 2, #With of separation lines in 'cells',
size = 2,
hjust = 0,
angle = 90,
group.colors = colors,
#slim.col.label = TRUE,
#remove.key = removeKey
# ) + theme(legend.position = "none"
) + theme(
text = element_text(size=textSize),
legend.position="bottom" #,
#axis.text.x = element_text(angle = 90, hjust=1)
) + scale_fill_gradientn(colors = c("blue", "white", "red")
)
## Save to file ##
FNbase <- paste0(tag, VersionPdfExt)
FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
FNrel <- paste0("report_figures/", FNbase)
pdf(FN)
print(plotList[[tag]])
dev.off()
###################
## Add link to interactive heatmap for this project
#for (i in 1:length(residualClusterMarkers)){
## Create default ##
link <- ""
if (Obio@projectDetailList$species %in% c("mus_musculus", "homo_sapiens")){
cat.name <- tag
cat_type <- paste0("temp_", Obio@parameterList$project_id)
cat.description.text <- tag
gene.vec <- Obio@parameterList[["cat2HMplotList"]][[i]]
gene.vec <- gene.vec[gene.vec != ""]
gene.vec <- sort(na.omit(gene.vec))
## Determine if cat exists ##
catID <- add.category.to.lab.reference.table.hs(
host = Obio@dbDetailList$host,
pwd = db.pwd,
user = Obio@dbDetailList$db.user,
cat.ref.db = Obio@dbDetailList$ref.cat.db,
cat.ref.db.table = Obio@parameterList$lab.categories.table,
gene.vector = gene.vec,
gene.id = Obio@parameterList$geneIDcolumn, #options hgnc_symbol, mgi_symbol
mm.hs.conversion.file = "assets/annotation/homologene.data.txt",
cat_name = cat.name,
cat_type = cat_type,
data_source = paste0(Obio@parameterList$labname, " Lab"),
comments_1 = "",
comments_2 = "",
new.lab.category.table = FALSE,
cat.description.db = "internal_categories",
cat.description.db.table = "category_description",
cat.description.text = cat.description.text,
lab.name = Obio@parameterList$labname,
replaceExistingCatName = TRUE
)
link <- paste0('Link to an interactive version of <a href="https://',urlString,'/',Obio@parameterList$project_id,'/category-view/',catID,'" target="_blank">this heatmap</a>. ')
}
#} ## End dfDat loop
## Done adding link
##############################
## Create R markdown chunk ##
figLegend <- paste0(
'**Figure ',
figureCount,
':** ',HMname ,' Heatmap showing the most distinct marker genes in each cluster. ' , subsetString,
'Download a pdf of this figure <a href="',FNrel,'" target="_blank">here</a>. ',
link
)
figureCount <- figureCount + 1
NewChnk <- paste0(
"#### Heatmap ", HMname,
"\n```{r Heatmap_", tag,
", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
figLegend,"'}\n",
"\n",
"\n print(plotList[['",tag,"']])",
"\n cat( '\n')",
"\n\n\n```\n"
)
chnkVec <- c(
chnkVec,
NewChnk
)
}
rm(OsC_HM)
## Done making Heatmap ##
###############################################################################
###############################################################################
## Make dotplot
Idents(OsC) <- "clusterName"
# levels <- paste0(
# "C",
# sort(unique(OsC@meta.data[,"seurat_clusters"]))
# )
#
# levels(OsC) <- levels
dpGenes <- as.vector(unique(dfTop5$gene))
if (length(dpGenes) >= 50){
dpGenes <- as.vector(unique(dfTop1$gene))
}
#dpGenes <- rev(dpGenes[!(duplicated(dpGenes))])
tag <- paste0("Dotplot_", "Var_Genes")
textSize <- 5
dfCol <- unique(OsC@meta.data[,c("clusterName", "clusterColor")])
if(!is.null(Obio@parameterList$clusterNameOrder)){
row.names(dfCol) <- dfCol$clusterName
dfCol <- dfCol[Obio@parameterList$clusterNameOrder,]
}
colVec <- as.vector(dfCol$clusterColor)
names(colVec) <- dfCol$clusterName
textSize <- 2
if (length(colVec) > 5){
textSize <-1
} else if (length(colVec) > 20){
textSize <- 0.75
} else if (length(colVec) > 40){
textSize <- 0.5
}
Idents(OsC) <- "clusterName"
Idents(OsC) <- factor(Idents(OsC), levels = Obio@parameterList[["clusterNameOrder"]])
#OsC@meta.data$clusterName <- factor(OsC@meta.data$clusterName, levels = #Obio@parameterList[["clusterNameOrder"]])
plotList[[tag]] <- DotPlotSB(
object = OsC,
features = dpGenes,
assay = "RNA",
#cols = cols,
group.by = NULL,
split.by = NULL,
dot.scale = 4,
col.min = 0,
col.max = 5
) + ggtitle(gsub("_", "", tag)
) + xlab(""
) + coord_fixed(
# ) + coord_flip(
) + theme_bw() +
theme(axis.text.x = element_text(size=rel(textSize), angle = 45, hjust=1, color = colVec))
# plotList[[tag]] <- DotPlot(
# object = OsC,
# features = dpGenes,
# assay = "RNA",
# #cols = cols,
# group.by = NULL,
# split.by = NULL,
# dot.scale = 4,
# col.min = 0,
# col.max = 5
# ) + ggtitle(gsub("_", "", tag)
# ) + xlab(""
# ) + coord_fixed(
# #) + coord_flip(
# ) + theme_bw() +
# theme(axis.text.x = element_text(size=rel(0.5), angle = 45, hjust=1, color = colVec))
#
## Save to file ##
FNbase <- paste0(tag, VersionPdfExt)
FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
FNrel <- paste0("report_figures/", FNbase)
pdf(FN)
print(plotList[[tag]])
dev.off()
## Create R markdown chunk ##
figLegend <- paste0(
'**Figure ',
figureCount,
':** Dotplot showing showing selected marker genes. ',
'Download a pdf of this figure <a href="',FNrel,'" target="_blank">here</a>. '
)
figureCount <- figureCount + 1
NewChnk <- paste0(
"#### Dotplot Markers",
"\n```{r Dotplot_var_",
", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
figLegend,"'}\n",
"\n",
"\n print(plotList[['",tag,"']])",
"\n cat( '\n')",
"\n\n\n```\n"
)
chnkVec <- c(
chnkVec,
NewChnk
)
## Done making dotplot ##
###############################################################################
############################
## Make cat feature plot
## done making cat feature plot
################################
if (length(plotList) > 3){
tabVar <- ".tabset .tabset-fade .tabset-dropdown"
} else {
tabVar <- ".tabset .tabset-fade .tabset-pills"
}
In this section heatmaps and dotplots for various gene categories are provided.
cat(paste(knit(text = chnkVec, quiet = T), collapse = '\n'))
Figure 30: Top_Cluster_Markers Heatmap showing the most distinct marker genes in each cluster. Download a pdf of this figure here. Link to an interactive version of this heatmap.
Figure 31: Top_TF_Cluster_Markers Heatmap showing the most distinct marker genes in each cluster. Download a pdf of this figure here. Link to an interactive version of this heatmap.
Figure 32: Top_LR_Cluster_Markers Heatmap showing the most distinct marker genes in each cluster. Download a pdf of this figure here. Link to an interactive version of this heatmap.
Figure 33: Top_LR_TF_Cluster_Markers Heatmap showing the most distinct marker genes in each cluster. Download a pdf of this figure here. Link to an interactive version of this heatmap.
Figure 34: Mk Heatmap showing the most distinct marker genes in each cluster. Download a pdf of this figure here. Link to an interactive version of this heatmap.
Figure 35: DC Heatmap showing the most distinct marker genes in each cluster. Download a pdf of this figure here. Link to an interactive version of this heatmap.
Figure 36: NK Heatmap showing the most distinct marker genes in each cluster. Download a pdf of this figure here. Link to an interactive version of this heatmap.
Figure 37: FCGR3Apos_Mono Heatmap showing the most distinct marker genes in each cluster. Download a pdf of this figure here. Link to an interactive version of this heatmap.
Figure 38: CD8pos_T Heatmap showing the most distinct marker genes in each cluster. Download a pdf of this figure here. Link to an interactive version of this heatmap.
Figure 39: B_cell Heatmap showing the most distinct marker genes in each cluster. Download a pdf of this figure here. Link to an interactive version of this heatmap.
Figure 40: Naive_CD4pos_T Heatmap showing the most distinct marker genes in each cluster. Download a pdf of this figure here. Link to an interactive version of this heatmap.
Figure 41: CD14_plus_Mono Heatmap showing the most distinct marker genes in each cluster. Download a pdf of this figure here. Link to an interactive version of this heatmap.
Figure 42: Memory_CD4_plus Heatmap showing the most distinct marker genes in each cluster. Download a pdf of this figure here. Link to an interactive version of this heatmap.
Figure 43: Proliferation Heatmap showing the most distinct marker genes in each cluster. Download a pdf of this figure here. Link to an interactive version of this heatmap.
Figure 44: GO_EXECUTION_PHASE_OF_APOPTOSIS Heatmap showing the most distinct marker genes in each cluster. Download a pdf of this figure here. Link to an interactive version of this heatmap.
Figure 45: Male_specific_Genes Heatmap showing the most distinct marker genes in each cluster. Download a pdf of this figure here. Link to an interactive version of this heatmap.
Figure 46: Female_Specifdic_Genes Heatmap showing the most distinct marker genes in each cluster. Download a pdf of this figure here. Link to an interactive version of this heatmap.
Figure 47: Dotplot showing showing selected marker genes. Download a pdf of this figure here.
library(DT)
###############################################################################
## Calculate percentages of expressed genes ##
DefaultAssay(OsC) <- "RNA"
my_genes <- rownames(x = OsC@assays$RNA)
exp <- FetchData(OsC, my_genes)
ExprMatrix <- round(as.matrix(colMeans(exp > 0)) *100,1)
colnames(ExprMatrix)[1] <- "count_cut_off"
dfExprMatrix <- data.frame(ExprMatrix)
dfExprMatrix[["gene"]] <- row.names(dfExprMatrix)
Obio@dataTableList[["dfPercCellsExpr"]] <- dfExprMatrix
## Done calculating percentages of expressed gens ##
###############################################################################
###############################################################################
## Create one table per cluster ##
## Add expressed in N percent cells ##
dfPercCellsExpr <- Obio@dataTableList$dfPercCellsExpr
dfDat <- Obio@dataTableList$dfGeneralMarkersFilt
dfDat$avg_diff <- round(dfDat$avg_diff,2)
dfDat <- dfDat[,c("cluster", "gene", "avg_diff", "power")]
names(dfDat) <- gsub("avg_diff", "DGE_avg_diff", names(dfDat))
names(dfDat) <- gsub("power", "DGE_power", names(dfDat))
dfDat[["join_col"]] <- paste0(dfDat$gene, "_",dfDat$cluster)
dfDat$cluster <- NULL
dfDat$gene <- NULL
## Add residual results ##
dfClusterResiduals <- Obio@dataTableList$dfClusterResiduals
library(tidyverse)
dfLongResiduals <- data.frame(
dfClusterResiduals %>% pivot_longer(!gene, names_to = "cluster", values_to = "residuals")
)
###############################################################################
## Calculate Coefficient of variation for no-cluster X genes to find best unique cluster markers
## Done ##
###############################################################################
dfRegMins <- data.frame(dfLongResiduals %>% group_by(cluster) %>% top_n(10, residuals) %>% summarize(min10 = min(residuals)))
## Select as cut-off: median of all so at least half the cluster have 10 marker genes
cutOff <- median(dfRegMins$min10)
if (cutOff > 1){
cutOff <- 1
}
dfLongResiduals <- dfLongResiduals[dfLongResiduals$residuals > cutOff, ]
dfLongResiduals$cluster <- gsub("_Residuals", "", dfLongResiduals$cluster)
dfLongResiduals[["join_col"]] <- paste0(
dfLongResiduals$gene,
"_",
dfLongResiduals$cluster
)
dfLongResiduals$gene <- NULL
dfLongResiduals$cluster <- NULL
dfDat <- merge(
dfDat,
dfLongResiduals,
by.x = "join_col",
by.y = "join_col",
all =TRUE
)
dfDat[is.na(dfDat)] <- 0
names(dfDat) <- gsub("residuals", "LinearReg_Residuals", names(dfDat))
dfDat <- dfDat[order(dfDat$LinearReg_Residuals, decreasing = T),]
dfDat[["gene"]] <- sapply(
dfDat$join_col, function(x) unlist(strsplit(x, "_"))[1]
)
dfDat[["Cluster"]] <- sapply(
dfDat$join_col, function(x) unlist(strsplit(x, "_"))[2]
)
dfDat$join_col <- NULL
dfDat$gene <- substr(dfDat$gene,1,50)
dfDat[["uniqueMarker"]] <- as.character(!duplicated(dfDat$gene))
dfDat$uniqueMarker <- substr(dfDat$uniqueMarker, 1,1)
#######################
## Add cluster name
h <- sum(c("clusterName", "clusterColor") %in% names(OsC@meta.data))
if (sum(h) == 2){
dfClust <- unique(OsC@meta.data[,c("seurat_clusters", "clusterName", "clusterColor")])
dfDat <- merge(
dfDat,
dfClust,
by.x = "Cluster",
by.y = "clusterName"
)
}
## Done cluster name
########################
dfDat$Cluster <- paste0("Cluster_", dfDat$Cluster, "_C")
dtList <- list()
tabClusters <- sort(unique(dfDat$clusterName))
chnkVec <- as.vector(NULL, mode="character")
linkGeneView <- paste0('<a href="https://',urlString,'/',Obio@parameterList$project_id,'/gene-view" target = "_blank">GeneView</a>')
linkFeatureView <- paste0('<a href="https://',urlString,'/mdata/',Obio@parameterList$project_id,'/html/FeatureView.html" target="_blank">FeatureView</a>')
## Save workbook ##
baseFN <- paste0(
Obio@parameterList$project_id,
".Cluster.marker.table.xlsx"
)
FNtabrel <- paste0("report_figures/", baseFN)
tablink <- paste0('An Excel table containing the cluster marker genes can be downloaded <a href = "',FNtabrel,'">here</a>. ')
#for (i in 1:length(tabClusters)){
#tabLegend = paste0("**Table: ** Positive and negative marker genes for ", tabClusters[i])
tabLegend = paste0("**Table: ** Positive and negative cluster-defining marker genes. Perc_Cells_Expr: Percentage of total cells expressing gene X. Enr in Cluster: Enrichment of gene X in cluster Y. To collapse the table to one particular cluster, type the name of the cluster in the search box.",
"Use the ",linkGeneView," or ",linkFeatureView," functionalities to examine individual genes in more detail. ",tablink
)
#dfTempDat <- dfDat[dfDat$cluster == tabClusters[i],]
dfTempDat <- dfDat
## Percent expressed genes
dfTempDat <- merge(
dfTempDat,
Obio@dataTableList$dfPercCellsExpr,
by.x = "gene",
by.y = "gene"
)
names(dfTempDat) <- gsub("count_cut_off", "Perc_Cells_Expr",names(dfTempDat))
names(dfTempDat) <- gsub("myAUC", "AUC", names(dfTempDat))
names(dfTempDat) <- gsub("[.]", "", names(dfTempDat))
#dtList[[paste0("Table",i)]] <- datatable(dfDat,rownames = FALSE)
if (Obio@parameterList$host == "10.27.241.234"){
urlString <- "biologic.thecrick.org"
} else {
urlString <- "biologic.crick.ac.uk"
}
dfTempDat[["ClusterName"]] <- paste0(
'<p style="background-color:',dfTempDat$clusterColor,';text-align:center">',dfTempDat$Cluster,'</p>'
)
orderVec <- c('gene','ClusterName','DGE_avg_diff','DGE_power','LinearReg_Residuals','uniqueMarker','Cluster','Perc_Cells_Expr')
orderVec <- orderVec[orderVec %in% names(dfTempDat)]
dfTempDat <- dfTempDat[,orderVec]
dfOutput <- dfTempDat
dfTempDat$gene <- paste0('<a href="https://',urlString,'/',Obio@parameterList$project_id,'/gene-view?query=',dfTempDat$gene,'&exact=TRUE" target="_blank">', dfTempDat$gene, '</a>')
NewChnk <- paste0(
"#### ", names(dtList),
"\n```{r datatable_",
i,", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
tabLegend,"'}\n",
"\n",
"\n datatable(dfTempDat,rownames = FALSE, escape = FALSE)",
"\n cat( '\n')",
"\n\n\n```\n"
)
chnkVec <- c(
chnkVec,
NewChnk
)
#}
##############################################################################
## Create marker gene table Excel ##
###############################################################################
## Assemble DGE summary table ##
library(openxlsx)
wb <- createWorkbook()
hs1 <- createStyle(
fontColour = "#ffffff",
fgFill = "#000000",
halign = "CENTER",
textDecoration = "Bold"
)
sheetName <- "Cluster_Marker_Genes"
addWorksheet(
wb,
sheetName = sheetName
)
freezePane(wb, sheetName , firstActiveRow = 2)
writeData(wb, 1, dfOutput, startRow = 1, startCol = 1, headerStyle = hs1)
## Done adding to Excel workbook ##
## Save workbook ##
baseFN <- paste0(
Obio@parameterList$project_id,
".Cluster.marker.table.xlsx"
)
FNtabrel <- paste0("report_figures/", baseFN)
outPutFN <- paste0(
Obio@parameterList$reportTableDir,
baseFN
)
saveWorkbook(
wb,
outPutFN ,
overwrite = TRUE
)
## Done creating marker gene table ##
##############################################################################
## Done creating one table per cluster ##
##############################################################################
cat(paste(knit(text = chnkVec, quiet = T), collapse = '\n'))
Table: Positive and negative cluster-defining marker genes. Perc_Cells_Expr: Percentage of total cells expressing gene X. Enr in Cluster: Enrichment of gene X in cluster Y. To collapse the table to one particular cluster, type the name of the cluster in the search box.Use the GeneView or FeatureView functionalities to examine individual genes in more detail. An Excel table containing the cluster marker genes can be downloaded here.
###########################
## Create gmt list
## Retrieve gmt files from database
## Add custom gmt files
library(clusterProfiler)
library(ggplot2)
gmtList <- list()
pos <- grep("clusterSigEnrichmentList", slotNames(Obio))
if (length(pos) > 0){
if (is.null(Obio@clusterSigEnrichmentList)){
dbtableList <- list(
"Cell Type Signatures" = "mysigdb_sc_sig",
"Cell Type Signatures" = "cibersort_L22",
"GO-MF" = "mysigdb_c5_MF",
"Pathways" = "mysigdb_c2_1329_canonical_pathways",
"Allen_Brain_Atlas" = "Allen_Brain_Atlas"
)
} else {
dbtableList <- Obio@clusterSigEnrichmentList
}
} else {
dbtableList <- list(
"Cell Type Signatures" = "mysigdb_sc_sig",
"Cell Type Signatures" = "cibersort_L22",
"GO-MF" = "mysigdb_c5_MF",
"Pathways" = "mysigdb_c2_1329_canonical_pathways",
"Allen_Brain_Atlas" = "Allen_Brain_Atlas"
)
}
for (i in 1:length(dbtableList)){
dfTemp <- unique(import.db.table.from.db(
host = Obio@dbDetailList$host,
dbname = Obio@dbDetailList$ref.cat.db,
dbtable = dbtableList [[i]],
password = db.pwd,
user = Obio@dbDetailList$db.user
))
rmVec <- grep("temp_", dfTemp$cat_type)
if (length(rmVec) > 0){
dfTemp <- dfTemp[-rmVec, ]
}
dfTemp <- unique(dbcat2gmt(
dfTemp, # As downloaded from reference_categories_db_new database
gene.id.column = queryGS
))
dfTemp <- dfTemp[!duplicated(dfTemp[,1]), ]
write.table(
dfTemp,
"temp.gmt.txt",
row.names = F,
sep = "\t",
col.names = F,
quote = F
)
CPgmt <- read.gmt("temp.gmt.txt")
unlink("temp.gmt.txt")
CPgmt <- unique(CPgmt[CPgmt$gene != "", ])
gmtList[[dbtableList[[i]]]] <- CPgmt
}
## Edit collection names for plot
names(gmtList) <- gsub("mysigdb_", "",names(gmtList))
names(gmtList) <- gsub("c2_1329_canonical_p", "P",names(gmtList))
names(gmtList) <- gsub("sc_sig", "CellSig",names(gmtList))
names(gmtList) <- gsub("cibersort_L22", "CellSig",names(gmtList))
names(gmtList) <- gsub("c5_", "GO_",names(gmtList))
## Done creating gmt list
###########################
resList <- profileCluster(
# Input gmt file with categories to test: dfGmt
# Output: table with enrichments
obj = Obio,
markerList = Obio@dataTableList[["residualClusterMarkers"]],
gmtList = gmtList,
nTop = 15,
pvalueCutoff= 0.5
)
plotList <- resList$plotList
chnkVec <- resList$chnkVec
if (length(plotList) > 3){
tabVar <- ".tabset .tabset-fade .tabset-dropdown"
} else {
tabVar <- ".tabset .tabset-fade .tabset-pills"
}
cat(paste(knit(text = chnkVec, quiet = T), collapse = '\n'))
Figure 48: Enrichment analysis of cluster gene signatures to infer the cluster cell type. Download a pdf of this figure here. To view these gene sets in the context of your data, go to CategoryView > Cell Signatures and find these categories using the search box.
Figure 49: Enrichment analysis of cluster gene signatures to infer the cluster cell type. Download a pdf of this figure here. To view these gene sets in the context of your data, go to CategoryView > Cell Signatures and find these categories using the search box.
Figure 50: Enrichment analysis of cluster gene signatures to infer the cluster cell type. Download a pdf of this figure here. To view these gene sets in the context of your data, go to CategoryView > Cell Signatures and find these categories using the search box.
Figure 51: Enrichment analysis of cluster gene signatures to infer the cluster cell type. Download a pdf of this figure here. To view these gene sets in the context of your data, go to CategoryView > Cell Signatures and find these categories using the search box.
## Two options: full heatmap and averaged heatmaps
## https://satijalab.org/seurat/v3.0/interaction_vignette.html
###############################################################################
## Add percentage expressed genes ##
# DefaultAssay(OsC) <- "RNA"
# my_genes <- rownames(x = OsC@assays$RNA)
#
# exp <- FetchData(OsC, my_genes)
#
# ExprMatrix <- round(as.matrix(colMeans(exp > 0)) *100,1)
# colnames(ExprMatrix)[1] <- "count_cut_off"
# dfExprMatrix <- data.frame(ExprMatrix)
# dfExprMatrix[["gene"]] <- row.names(dfExprMatrix)
#
# Obio@dataTableList[["dfPercCellsExpr"]] <- dfExprMatrix
#
# hmRelevantGenes <- as.vector(unique(dfExprMatrix[dfExprMatrix$count_cut_off > Obio@parameterList$singleCellPercExpressedMinCutOff, "gene"]))
#
#
#
# ## Done adding percentage expressed ##
# ###############################################################################
#
#
# ###############################################################################
# ## Make plot according to reference categories ##
# allGenes <- rownames(x = OsC@assays$RNA)
# OsC <- ScaleData(OsC, verbose = FALSE, features=allGenes)
#
# DefaultAssay(OsC) <- "RNA"
#
#
# ## Add heatmap identities to meta.data ##
#
OsC@meta.data[["hmIdent"]] <- paste0(
OsC@meta.data[,"seurat_clusters"],
"_",
substr(OsC@meta.data$sampleID,1,10)
)
#
if (length(unique(OsC@meta.data$hmIdent)) > 25){
OsC@meta.data[["hmIdent"]] <- OsC@meta.data[,"seurat_clusters"]
}
#
# Idents(OsC) <- "hmIdent"
#
# ## Done adding heatmap identities to meta.data ##
#
# printPdf <- TRUE
# referenceList <- Obio@dataTableList$referenceList
# for (i in 1:length(referenceList)){
# HMname <- names(referenceList)[i]
# geneVec <- unique(referenceList[[i]][referenceList[[i]] %in% rownames(x = OsC@assays$RNA)])
#
# if (length(geneVec) > 50){
# geneVec <- geneVec[geneVec %in% hmRelevantGenes]
# }
#
#
# ## Do Heatmap ##
# if (length(geneVec) < 1500 & length(geneVec) > 2){
# Idents(OsC) <- "hmIdent"
# HMname <- names(referenceList)[i]
# cat("\n")
# cat(paste0("**Heatmap ", HMname,"**"))
# cat("\n")
# cat("\n")
# ## Cluster genes ##
# HMgenes <- referenceList[[i]]
# #dfCluster <- OsC@assays$integrated
# Mexpr <- GetAssayData(object = OsC, assay.type = "integrated", slot = "scale.data")
# HMgenesSel <- HMgenes[HMgenes %in% row.names(Mexpr)]
#
# if (length(HMgenesSel) > 2){
# Mexpr <- Mexpr[HMgenesSel,]
#
# pdf(paste0("temp", VersionPdfExt))
# hmRes <- make.hm(
# m.df1 = Mexpr,
# filename = "",
# k.number = 1,
# n.colors = 1000,
# hclust.method = "complete",
# dist.method = "euclidean",
# main = "",
# Colv = TRUE,
# showRowNames = TRUE,
# showColNames = F,
# plotSeparationLines = FALSE
# )
# dev.off()
#
# orderedGenes <- as.vector(unique(row.names(hmRes$sorted)))
#
# if (length(unique(OsC@meta.data$hmIdent)) > 10){
# removeKey <- TRUE
# } else {
# removeKey <- FALSE
# }
#
# if (length(orderedGenes) <= 50){
# label = TRUE
# } else {
# label = FALSE
# }
#
# p1 <- DoHeatmap(
# object = OsC,
# features = orderedGenes,
# #group.by = "hmIdent",
# draw.lines =T,
# label = label,
# group.bar = TRUE,
# slot = "scale.data",
# lines.width = 2 #With of separation lines in 'cells'
# #slim.col.label = TRUE,
# #remove.key = removeKey
# ) + theme(legend.position = "none")
#
# print(p1)
#
# ## Save to file ##
# FNbase <- paste0("HM", HMname, VersionPdfExt)
# FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
# FNrel <- paste0("report_figures/", FNbase)
#
# pdf(FN)
# print(p1)
# dev.off()
#
# cat("\n")
# cat(paste0('Download a pdf of this figure [here](', FNrel, '). '))
# cat("\n")
# cat("\n")
#
# }
# }
# }
## Done making plots according to gene categories ##
###############################################################################
DefaultAssay(OsC) <- "RNA"
cat(paste0(
'Feature plots for any gene in this experiment can be viewed <a href="https://biologic.crick.ac.uk/mdata/',Obio@parameterList$project_id,'/html/FeatureView.html" target="_blank">here</a>.'
))
Feature plots for any gene in this experiment can be viewed here.
plotGenes <- c(
Obio@dataTableList$referenceList$integrated_top30var[1:10]
)
plotParts <- ceiling(length(plotGenes)/2)
chnkVec <- as.vector(NULL, mode = "character")
plotListF <- list()
for (i in 1:plotParts){
tag1 <- paste0("Featureplot_",i)
featureGenes <- c(plotGenes[((2*i)-1)], plotGenes[((2*i))])
plotListF[[tag1]] <- FeaturePlot(
OsC,
features = featureGenes,
#split.by = "orig.ident",
reduction = Obio@parameterList$primReduction
)
## Save to file ##
FNbase <- paste0("Featureplot.", plotGenes[((2*i)-1)], ".", plotGenes[((2*i))],".", VersionPdfExt)
FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
FNrel <- paste0("report_figures/", FNbase)
pdf(FN)
print(plotListF[[tag1]])
dev.off()
linkFeatureView <- paste0('<a href="https://',urlString,'/mdata/',Obio@parameterList$project_id,'/html/FeatureView.html" target="_blank">FeatureView</a>')
figLegend <- paste0(
"**Figure ",
figureCount,
":** Gene expression plot for genes ",
plotGenes[((2*i)-1)],
" and ",
plotGenes[((2*i))],".",
" Results for any other gene may be plotted in ",linkFeatureView,"."
)
NewChnk <- paste0(
"#### Featureplot ", plotGenes[((2*i)-1)], " and ",plotGenes[((2*i))],
"\n```{r FeaturePlot_", i,
", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
figLegend,"'}\n",
"\n",
"\n print(plotListF[['",tag1,"']])",
"\n cat( '\n')",
"\n\n\n```\n"
)
chnkVec <- c(
chnkVec,
NewChnk
)
}
cat(paste(knit(text = chnkVec, quiet = T), collapse = '\n'))
Figure 48: Gene expression plot for genes IGLC2 and GNLY. Results for any other gene may be plotted in FeatureView.
Figure 48: Gene expression plot for genes FCGR3A and CDKN1C. Results for any other gene may be plotted in FeatureView.
Figure 48: Gene expression plot for genes GZMB and CXCL10. Results for any other gene may be plotted in FeatureView.
Figure 48: Gene expression plot for genes IGHM and NKG7. Results for any other gene may be plotted in FeatureView.
Figure 48: Gene expression plot for genes S100A9 and IGLC3. Results for any other gene may be plotted in FeatureView.
figureCount <- figureCount + 1
# An alternative heuristic method generates an 'Elbow plot': a ranking of principle components based on the percentage of variance explained by each one (`ElbowPlot` function). In this example, we can observe an 'elbow' around PC9-10, suggesting that the majority of true signal is captured in the first 10 PCs.
ElbowPlot(object = OsC) + theme(
axis.text.y = element_text(size=8),
axis.text.x = element_text(size=8),
axis.title.y = element_text(size=8),
axis.title.x = element_text(size=8),
axis.line = element_line(colour = "black"),
panel.border = element_rect(colour = "black", fill=NA, size=1),
plot.title = element_text(hjust = 0.5, size = 12)
) + ggtitle(paste0("Variance per PCA Dimension"))
Figure, 49: Elbowplot Variance explained per PCA dimension
## Plot variance per PCA dimension ##
## Add PCA coordinates ##
dfTemp <- data.frame(OsC@reductions$pca@cell.embeddings)[, 1:20]
OsC <- addDf2seuratMetaData(
obj = OsC,
dfAdd = dfTemp
)
## Add UMAP coordinates to Metadata ##
dfAdd <- data.frame(OsC@reductions$umap@cell.embeddings)
OsC <- addDf2seuratMetaData(
obj = OsC,
dfAdd = dfAdd
)
## Add tSNE coordinates to Metadata ##
dfAdd <- data.frame(OsC@reductions$tsne@cell.embeddings)
OsC <- addDf2seuratMetaData(
obj = OsC,
dfAdd = dfAdd
)
Obio@dataTableList[["meta.data"]] <- OsC@meta.data
xVec <- c("PC_1","PC_3","PC_5","PC_7","PC_9","PC_11","PC_13","PC_15","PC_17","PC_19")
yVec <- c("PC_2","PC_4","PC_6","PC_8","PC_10","PC_12","PC_14","PC_16","PC_18","PC_20")
pcVec <- c("PC_1","PC_2","PC_3","PC_4","PC_5","PC_6","PC_7","PC_8","PC_9","PC_10")
chnkVec <- as.vector(NULL, mode = "character")
plotListCell <- list()
plotListGene <- list()
dfDat <- Obio@dataTableList$meta.data
xMax <- ceiling(max(dfDat[,"PC_1"]))
xMin <- floor(min(dfDat[,"PC_1"]))
aLimit <- max(abs(c(xMax, xMin)))
###############################################################################
## Collect top-enriched genes ##
EnrichedGenesList <- list()
## Done ##
###############################################################################
for (i in 1:length(xVec)){
dfDat <- Obio@dataTableList$meta.data
dfSel <- dfDat
selXY <- c(xVec[i], yVec[i])
colCol <- "clusterName"
tag <- paste0(xVec[i], "and", yVec[i])
tag <- gsub("_", "", tag)
## Make Cell level PCA
plotListCell[[tag]] <- ggplot(data=dfDat, aes_string(selXY[1] , selXY[2], col=colCol, shape="sampleName")
) + geom_vline(xintercept = 0, color = "grey", size=0.5
) + geom_hline(yintercept = 0, color = "grey", size=0.5
) + geom_point()+ ggtitle(paste0("PCA - Cell Level")
) + theme_bw(
) + theme(
axis.text.y = element_text(size=8),
axis.text.x = element_text(size=8),
axis.title.y = element_text(size=8),
axis.title.x = element_text(size=8),
axis.line = element_line(colour = "black"),
panel.border = element_rect(colour = "black", fill=NA, size=1),
plot.title = element_text(hjust = 0.5, size = 12)
) + xlim(-1*aLimit, aLimit) + ylim(-1*aLimit, aLimit)
h <- sum(c("clusterName", "sampleColor") %in% names(dfDat))
if (h ==2){
dfCol <- unique(dfDat[,c("clusterName", "clusterColor")])
colVec <- as.vector(dfCol$clusterColor)
names(colVec) <- as.vector(dfCol$clusterName)
plotListCell[[tag]] <- plotListCell[[tag]] + scale_colour_manual("clusterName" ,values = colVec
)
}
plotListCell[[tag]] <- plotListCell[[tag]] + guides(color=guide_legend(title="Clusters"), shape=guide_legend(title="Samples")
)
## Save to file ##
FNbase <- paste0("PCA.cell.level.", xVec[i],".", yVec[i], ".", VersionPdfExt)
FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
FNrel <- paste0("report_figures/", FNbase)
pdf(FN)
print(plotListCell[[tag]])
dev.off()
link <- paste0('<a href="https://',urlString,'/',Obio@parameterList$project_id,'/pca?x_axis=',gsub('_', '', xVec[i]),'&y_axis=',gsub('_', '', yVec[i]),'" target = "_blank">here</a>')
figCap <- paste0(
'**Figure, ' ,figureCount,'A:** Cell-level PCA plot for dimensions ', xVec[i], ' and ', yVec[i],'. ',
'Download a pdf of this figure <a href="',FNrel,'" target="_blank">here</a>. ',
'An interactive version of this figure can be found ', link, '. '
)
NewChnk <- paste0(
"#### PCA Cell Level ", xVec[i], " and ",yVec[i],
"\n```{r PCAcells_", i,
", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
figCap,"'}\n",
"\n",
"\n print(plotListCell[['",tag,"']])",
"\n cat( '\n')",
"\n\n\n```\n"
)
chnkVec <- c(
chnkVec,
NewChnk
)
## Done with cells ##
###########################################################################
###########################################################################
## Start with genes ##
dfPCADat <- data.frame(Loadings(OsC, reduction = "pca"))
dfPCADat[["gene"]] <- row.names(dfPCADat)
dfPCADat <- gather(
dfPCADat,
condition,
measurement, 1:(ncol(dfPCADat)-1),
factor_key=TRUE
)
Obio@dataTableList[["dfPCAloadings"]] <- dfPCADat
## Make Gene Level PCA ##
dfPCADat <- data.frame(Loadings(OsC, reduction = "pca"))
dfPCADat[["gene"]] <- row.names(dfPCADat)
dfPCADat <- gather(
dfPCADat,
condition,
measurement, 1:(ncol(dfPCADat)-1),
factor_key=TRUE
)
dfLoad <- dfPCADat
Obio@dataTableList$dfPCAloadings <- dfLoad
## Plot ##
selXY <- c(xVec[i], yVec[i])
dfSel <- filter(dfLoad, condition %in% selXY)
dfSel <- dfSel %>% spread(key=condition, value=measurement)
row.names(dfSel) <- dfSel$gene
dfSel[["highlight"]] <- ""
dfSel[["cat"]] <- ""
dfSel[["selX"]] <- ""
dfSel[["selY"]] <- ""
dfSel <- dfSel[order(dfSel[,selXY[1]], decreasing = FALSE), ]
dfSel[1:15, "highlight"] <- "+"
## Use two standard deviations for enrichment ##
twoSD <- 2*sd(dfSel[,selXY[1]])
twoSDxLine <- 2*sd(dfSel[,selXY[1]])
gSvec <- dfSel[dfSel[,selXY[1]] < -1* twoSD, "gene"]
if (length(gSvec) > 3){
EnrichedGenesList[[paste0(selXY[1], "_neg")]]<- as.vector(gSvec)
dfSel[dfSel$gene %in% gSvec, "cat"] <- "Selected"
dfSel[dfSel$gene %in% gSvec, "selX"] <- "+"
} else {
EnrichedGenesList[[paste0(selXY[1], "_neg")]]<- as.vector(dfSel$gene[1:15])
dfSel[dfSel$gene %in% as.vector(dfSel$gene[1:15]), "cat"] <- "Selected"
dfSel[dfSel$gene %in% as.vector(dfSel$gene[1:15]), "selX"] <- "+"
}
dfSel <- dfSel[order(dfSel[,selXY[1]], decreasing = TRUE), ]
dfSel[1:15, "highlight"] <- "+"
gSvec <- dfSel[dfSel[,selXY[1]] > twoSD, "gene"]
if (length(gSvec) > 3){
EnrichedGenesList[[paste0(selXY[1], "_pos")]]<- as.vector(gSvec)
dfSel[dfSel$gene %in% gSvec, "cat"] <- "Selected"
dfSel[dfSel$gene %in% gSvec, "selX"] <- "+"
} else {
EnrichedGenesList[[paste0(selXY[1], "_pos")]]<- as.vector(dfSel$gene[1:15])
dfSel[dfSel$gene %in% as.vector(dfSel$gene[1:15]), "cat"] <- "Selected"
dfSel[dfSel$gene %in% as.vector(dfSel$gene[1:15]), "selX"] <- "+"
}
## Now dim 2
dfSel <- dfSel[order(dfSel[,selXY[2]], decreasing = FALSE), ]
dfSel[1:15, "highlight"] <- "+"
twoSD <- 2*sd(dfSel[,selXY[2]])
twoSDyLine <- 2*sd(dfSel[,selXY[2]])
gSvec <- dfSel[dfSel[,selXY[2]] < -1* twoSD, "gene"]
if (length(gSvec) > 15){
EnrichedGenesList[[paste0(selXY[2], "_neg")]]<- as.vector(gSvec)
dfSel[dfSel$gene %in% gSvec, "cat"] <- "Selected"
dfSel[dfSel$gene %in% gSvec, "selY"] <- "+"
} else {
EnrichedGenesList[[paste0(selXY[2], "_neg")]]<- as.vector(dfSel$gene[1:15])
dfSel[dfSel$gene %in% as.vector(dfSel$gene[1:15]), "cat"] <- "Selected"
dfSel[dfSel$gene %in% as.vector(dfSel$gene[1:15]), "selY"] <- "+"
}
dfSel <- dfSel[order(dfSel[,selXY[2]], decreasing = TRUE), ]
dfSel[1:15, "highlight"] <- "+"
gSvec <- dfSel[dfSel[,selXY[2]] > twoSD, "gene"]
if (length(gSvec) > 3){
EnrichedGenesList[[paste0(selXY[2], "_pos")]]<- as.vector(gSvec)
dfSel[dfSel$gene %in% gSvec, "cat"] <- "Selected"
dfSel[dfSel$gene %in% gSvec, "selY"] <- "+"
} else {
EnrichedGenesList[[paste0(selXY[2], "_pos")]]<- as.vector(dfSel$gene[1:15])
dfSel[dfSel$gene %in% as.vector(dfSel$gene[1:15]), "cat"] <- "Selected"
dfSel[dfSel$gene %in% as.vector(dfSel$gene[1:15]), "selY"] <- "+"
}
## Done
colVec <- c("grey", "black")
names(colVec) <- c("", "Selected")
plotListGene[[tag]] <- ggplot(data=dfSel, aes_string(x=selXY[1],y=selXY[2], col="cat")
) + geom_vline(xintercept = 0, color = "grey", size=0.5
) + geom_hline(yintercept = 0, color = "grey", size=0.5
) + geom_vline(xintercept = c(twoSDxLine, -1* twoSDxLine), color = "red", lty=2,size=0.5
) + geom_hline(yintercept = c(twoSDyLine, -1* twoSDyLine), color = "red", lty=2,size=0.5
) + geom_hline(yintercept = 0, color = "grey", size=0.5
) + geom_point() + scale_color_manual(values=colVec
#) + ggtitle(paste0("PCA - Cell Level")
) + theme_bw(
) + theme(
axis.text.y = element_blank(), # element_text(size=8),
axis.text.x = element_blank(), #element_text(size=8),
axis.title.y = element_blank(), #element_text(size=8),
axis.title.x = element_blank(), #element_text(size=8),
axis.line = element_line(colour = "black"),
legend.position = "none",
panel.border = element_rect(colour = "black", fill=NA, size=1),
#plot.title = element_text(hjust = 0.5, size = 12)
) #+ guides(col = guide_legend(override.aes = list(shape = 16, size = legendDotSize)))
points <- as.vector(unique(dfSel[dfSel$highlight=="+", "gene"]))
plotListGene[[tag]] <- LabelPoints(plot = plotListGene[[tag]], points =points, repel = TRUE, xnudge = 0, ynudge = 0)
## Make historgrams
library(ggpubr)
library(gridExtra)
colVec <- c("grey", "black")
names(colVec) <- c("", "+")
hist_top <- ggplot(data=dfSel)+ geom_histogram(aes_string(x= selXY[1],fill="selX", color="selX"), binwidth=0.001) + theme(
axis.text.y = element_blank(),
axis.text.x = element_blank(),
axis.title.y = element_blank(),
axis.title.x = element_blank(),
legend.position = "none",
axis.line = element_line(colour = "black"),
panel.border = element_rect(colour = "black", fill=NA, size=1),
plot.title = element_text(hjust = 0.5, size = 12)
) + geom_vline(xintercept = c(twoSDxLine, -1* twoSDxLine), color = "red", lty=2,size=0.5
) + scale_color_manual(values=colVec
) + scale_fill_manual(values=colVec
)
empty <- ggplot()+ geom_blank() + theme(panel.border = element_rect(colour = "white", fill=NA, size=1),)
#scatter <- plotListGene[[tag]]
hist_right <- ggplot(data=dfSel)+ geom_histogram(aes_string(x= selXY[2],fill="selY", color="selY"), binwidth=0.001) + theme(
axis.text.y = element_blank(),
axis.text.x = element_blank(),
axis.title.y = element_blank(),
axis.title.x = element_blank(),
legend.position = "none",
axis.line = element_line(colour = "black"),
panel.border = element_rect(colour = "black", fill=NA, size=1),
plot.title = element_text(hjust = 0.5, size = 12)
) + geom_vline(xintercept = c(twoSDxLine, -1* twoSDxLine), color = "red", lty=2,size=0.5
) + scale_color_manual(values=colVec
) + scale_fill_manual(values=colVec
) + coord_flip()
## End histograms
## Arrange Figure ##
pdf(pdfTemp)
plotListGene[[tag]] <- grid.arrange(hist_top, empty, plotListGene[[tag]] , hist_right, ncol=2, nrow=2, widths=c(4, 1), heights=c(1, 4))
plotListGene[[tag]] <- annotate_figure(
plotListGene[[tag]],
top = text_grob("PCA Loadings Gene Level", face = "bold", size = 12),
bottom = text_grob(paste0("PCA Dim ",gsub("PC_", "", selXY[1])," Loadings"), size = 12),
left = text_grob(paste0("PCA Dim ",gsub("PC_", "", selXY[2])," Loadings"), rot = 90, size = 12) #,
#right = text_grob(bquote("Superscript: ("*kg~NH[3]~ha^-1~yr^-1*")"), rot = 90),
#fig.lab = "Figure 1", fig.lab.face = "bold"
)
dev.off()
unlink(pdfTemp)
## Done arranging figure ##
## Save to file ##
FNbase <- paste0("PCA.cell.level.", xVec[i],".", yVec[i], VersionPdfExt)
FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
FNrel <- paste0("report_figures/", FNbase)
pdf(FN)
plot(plotListGene[[tag]])
dev.off()
dim1 <- gsub("PC_", "", xVec[i])
dim2 <- gsub("PC_", "", yVec[i])
link <- paste0(
'<a href="https://',urlString,'/',
Obio@parameterList$project_id,
'/scatterplot?x_axis=add_counts_PCA_Dim_',
dim1,
'_Loadings&y_axis=add_counts_PCA_Dim_',
dim2,
'_Loadings&highlight_gene=&cat_id=ag_lab_categories__10',
'" target="_blank">here</a>'
)
figCap <- paste0(
"**Figure, " ,figureCount,"B:**Gene-level PCA plot for dimensions ", xVec[i], " and ", yVec[i], ". An interactive version of this figure can be found ", link, ". "
)
NewChnk <- paste0(
"\n```{r PCA_gene_level_", i,
", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
figCap,"'}\n",
"\n",
"\n print(plotListGene[['",tag,"']])",
"\n cat( '\n')",
"\n\n\n```\n"
)
chnkVec <- c(
chnkVec,
NewChnk
)
## Done with genes ##
###########################################################################
figureCount <- figureCount + 1
}
if (length(plotListGene) > 3){
tabVar <- ".tabset .tabset-fade .tabset-dropdown"
} else {
tabVar <- ".tabset .tabset-fade .tabset-pills"
}
cat(paste(knit(text = chnkVec, quiet = T), collapse = '\n'))
Figure, 49A: Cell-level PCA plot for dimensions PC_1 and PC_2. Download a pdf of this figure here. An interactive version of this figure can be found here.
Figure, 49B:Gene-level PCA plot for dimensions PC_1 and PC_2. An interactive version of this figure can be found here.
Figure, 50A: Cell-level PCA plot for dimensions PC_3 and PC_4. Download a pdf of this figure here. An interactive version of this figure can be found here.
Figure, 50B:Gene-level PCA plot for dimensions PC_3 and PC_4. An interactive version of this figure can be found here.
Figure, 51A: Cell-level PCA plot for dimensions PC_5 and PC_6. Download a pdf of this figure here. An interactive version of this figure can be found here.
Figure, 51B:Gene-level PCA plot for dimensions PC_5 and PC_6. An interactive version of this figure can be found here.
Figure, 52A: Cell-level PCA plot for dimensions PC_7 and PC_8. Download a pdf of this figure here. An interactive version of this figure can be found here.
Figure, 52B:Gene-level PCA plot for dimensions PC_7 and PC_8. An interactive version of this figure can be found here.
Figure, 53A: Cell-level PCA plot for dimensions PC_9 and PC_10. Download a pdf of this figure here. An interactive version of this figure can be found here.
Figure, 53B:Gene-level PCA plot for dimensions PC_9 and PC_10. An interactive version of this figure can be found here.
Figure, 54A: Cell-level PCA plot for dimensions PC_11 and PC_12. Download a pdf of this figure here. An interactive version of this figure can be found here.
Figure, 54B:Gene-level PCA plot for dimensions PC_11 and PC_12. An interactive version of this figure can be found here.
Figure, 55A: Cell-level PCA plot for dimensions PC_13 and PC_14. Download a pdf of this figure here. An interactive version of this figure can be found here.
Figure, 55B:Gene-level PCA plot for dimensions PC_13 and PC_14. An interactive version of this figure can be found here.
Figure, 56A: Cell-level PCA plot for dimensions PC_15 and PC_16. Download a pdf of this figure here. An interactive version of this figure can be found here.
Figure, 56B:Gene-level PCA plot for dimensions PC_15 and PC_16. An interactive version of this figure can be found here.
Figure, 57A: Cell-level PCA plot for dimensions PC_17 and PC_18. Download a pdf of this figure here. An interactive version of this figure can be found here.
Figure, 57B:Gene-level PCA plot for dimensions PC_17 and PC_18. An interactive version of this figure can be found here.
Figure, 58A: Cell-level PCA plot for dimensions PC_19 and PC_20. Download a pdf of this figure here. An interactive version of this figure can be found here.
Figure, 58B:Gene-level PCA plot for dimensions PC_19 and PC_20. An interactive version of this figure can be found here.
selVec <- c("clusterName", names(OsC@meta.data)[grep("^PC", names(OsC@meta.data))])
dfPCdat <- OsC@meta.data[, selVec]
ymin <- 1.1*min(dfPCdat$PC_1)
ymax <- 1.1*max(dfPCdat$PC_1)
h <- sum(c("clusterName", "clusterColor") %in% names(OsC@meta.data))
if (h ==2){
dfCol <- unique(OsC@meta.data[,c("clusterName", "clusterColor")])
clusterVec <- dfCol$clusterName
clusterColVec <- as.vector(dfCol$clusterColor)
names(clusterColVec) <- as.vector(dfCol$clusterName)
} else{
clusterVec <- sort(unique(OsC@meta.data[,"clusterName"]))
colVec <- hue_pal()(length(clusterVec))
names(colVec) <- clusterVec
}
chnkVec <- as.vector(NULL, mode="character")
plotList <- list()
for (i in 1:length(clusterVec)){
dfTemp <- dfPCdat[dfPCdat[,"clusterName"] == clusterVec[i],]
dfTemp[,"clusterName"] <- NULL
library(tidyr)
dfTemp <- gather(dfTemp, PC)
orderVec <- sort(as.numeric(gsub("PC_", "",unique(dfTemp$PC))))
orderVec <- paste0("PC_", orderVec)
dfTemp$PC <- factor(dfTemp$PC, levels = orderVec)
Ncolumns <- length(unique(dfTemp$PC))
a <- paste0("Cluster_", clusterVec[i])
tag <- paste0("PCA_Cell_Distributions_", a)
plotList[[tag]] <-ggplot(
dfTemp,
aes(x=PC, y=value, fill = PC)
) + geom_hline(yintercept = 0, color = "black", size=0.5
) + geom_jitter(width=0.1,alpha=0.2
) + geom_boxplot(
) + theme_bw(
) + theme(
legend.position = "none",
axis.text.y = element_text(size=8),
axis.text.x = element_text(size=8, angle = 45,vjust = 1, hjust=1),
axis.title.y = element_text(size=8),
axis.title.x = element_text(size=8),
axis.line = element_line(colour = "black"),
panel.border = element_rect(colour = "black", fill=NA, size=1),
plot.title = element_text(hjust = 0.5, size = 12)
) + ggtitle(paste0("PCA Distribution: ", gsub("_", " ", a))
) + ylim(ymin, ymax) + scale_fill_manual(values=rep(as.vector(clusterColVec[i]), Ncolumns))
## Save to file ##
FNbase <- paste0(tag, VersionPdfExt)
FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
FNrel <- paste0("report_figures/", FNbase)
pdf(FN)
print(plotList[[tag]])
dev.off()
figCap <- paste0(
'**Figure, ' ,
figureCount,
':** This plot may help you to identify PCA dimensions, in which marker genes for cluster ',
clusterVec[i],
' become evident. Download a pdf of this figure <a href="',FNrel,'" target="_blank">here</a>. '
)
figureCount <- figureCount + 1
NewChnk <- paste0(
"#### ", tag,
"\n```{r ", tag,
", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
figCap,"'}\n",
"\n",
"\n print(plotList[['",tag,"']])",
"\n cat( '\n')",
"\n\n\n```\n"
)
chnkVec <- c(
chnkVec,
NewChnk
)
}
if (length(plotList) > 3){
tabVar <- ".tabset .tabset-fade .tabset-dropdown"
} else {
tabVar <- ".tabset .tabset-fade .tabset-pills"
}
cat(paste(knit(text = chnkVec, quiet = T), collapse = '\n'))
Figure, 59: This plot may help you to identify PCA dimensions, in which marker genes for cluster C0 become evident. Download a pdf of this figure here.
Figure, 60: This plot may help you to identify PCA dimensions, in which marker genes for cluster C1 become evident. Download a pdf of this figure here.
Figure, 61: This plot may help you to identify PCA dimensions, in which marker genes for cluster C3 become evident. Download a pdf of this figure here.
Figure, 62: This plot may help you to identify PCA dimensions, in which marker genes for cluster C2 become evident. Download a pdf of this figure here.
Figure, 63: This plot may help you to identify PCA dimensions, in which marker genes for cluster C4 become evident. Download a pdf of this figure here.
library(knitr)
library(ggplot2)
#save.image("temp.RData")
library(clusterProfiler)
library(ggplot2)
gmtList <- list()
pos <- grep("clusterSigEnrichmentList", slotNames(Obio))
if (length(pos) > 0){
if (is.null(Obio@clusterSigEnrichmentList)){
dbtableList <- list(
"Cell Type Signatures" = "mysigdb_sc_sig",
"Cell Type Signatures" = "cibersort_L22",
"GO-MF" = "mysigdb_c5_MF",
"Pathways" = "mysigdb_c2_1329_canonical_pathways",
"Allen_Brain_Atlas" = "Allen_Brain_Atlas"
)
} else {
dbtableList <- Obio@clusterSigEnrichmentList
}
} else {
dbtableList <- list(
"Cell Type Signatures" = "mysigdb_sc_sig",
"Cell Type Signatures" = "cibersort_L22",
"GO-MF" = "mysigdb_c5_MF",
"Pathways" = "mysigdb_c2_1329_canonical_pathways",
"Allen_Brain_Atlas" = "Allen_Brain_Atlas"
)
}
for (i in 1:length(dbtableList)){
dfTemp <- unique(import.db.table.from.db(
host = Obio@dbDetailList$host,
dbname = Obio@dbDetailList$ref.cat.db,
dbtable = dbtableList [[i]],
password = db.pwd,
user = Obio@dbDetailList$db.user
))
rmVec <- grep("temp_", dfTemp$cat_type)
if (length(rmVec) > 0){
dfTemp <- dfTemp[-rmVec, ]
}
dfTemp <- unique(dbcat2gmt(
dfTemp, # As downloaded from reference_categories_db_new database
gene.id.column = queryGS
))
dfTemp <- dfTemp[!duplicated(dfTemp[,1]), ]
write.table(
dfTemp,
"temp.gmt.txt",
row.names = F,
sep = "\t",
col.names = F,
quote = F
)
CPgmt <- read.gmt("temp.gmt.txt")
unlink("temp.gmt.txt")
CPgmt <- unique(CPgmt[CPgmt$gene != "", ])
gmtList[[dbtableList[[i]]]] <- CPgmt
}
## Edit collection names for plot
names(gmtList) <- gsub("mysigdb_", "",names(gmtList))
names(gmtList) <- gsub("c2_1329_canonical_p", "P",names(gmtList))
names(gmtList) <- gsub("sc_sig", "CellSig",names(gmtList))
names(gmtList) <- gsub("cibersort_L22", "CellSig",names(gmtList))
names(gmtList) <- gsub("c5_", "GO_",names(gmtList))
## Done creating gmt list
###########################
## Select colors ##
library(scales)
enrCols <- hue_pal()(length(gmtList))
names(enrCols) <- names(gmtList)
PCAdimensions <- paste0("PC_", 1:20)
plotList <- list()
chnkVec <- as.vector(NULL, mode = "character")
for (j in 1:length(PCAdimensions)){
posTestGeneSet <- as.vector(
unique(
EnrichedGenesList[[paste0(PCAdimensions[j], "_pos")]]
)
)
negTestGeneSet <- as.vector(
unique(
EnrichedGenesList[[paste0(PCAdimensions[j], "_neg")]]
)
)
###########################################################################
## Create GMT file for category enrichment ##
###########################
## Create gmt list
## Retrieve gmt files from database
## Add custom gmt files
## Done ##
###########################################################################
library(clusterProfiler)
library(ggplot2)
library(tidyr)
if (Obio@parameterList$geneIDcolumn != "mgi_symbol" & Obio@parameterList$geneIDcolumn != "hgnc_symbol") {
queryGS <- "hgnc_symbol"
} else {
queryGS <- Obio@parameterList$geneIDcolumn
}
if (Obio@parameterList$host == "10.27.241.234"){
urlString <- "biologic.thecrick.org"
} else {
urlString <- "biologic.crick.ac.uk"
}
pvalueCutoff <- 0.5
topMaxCat <- 10
## Get background gene set ##
#backgroundGeneVec <- row.names(OsC[["RNA"]]@counts)
if ((length(posTestGeneSet) >= 3) | (length(negTestGeneSet) >= 3)){
## Do enrichment ##
first <- TRUE
if (length(posTestGeneSet) >= 3){
for (k in 1:length(gmtList)){
egmt <- data.frame(
enricher(
negTestGeneSet,
TERM2GENE=gmtList[[k]],
pvalueCutoff = pvalueCutoff
)
)
if (!is.null(egmt)){
if (nrow(egmt) > 0){
egmt[["Collection"]] <- substr(names(gmtList)[k], 1,10)
}
if (first){
dfTempEnriched <- egmt
first <- FALSE
} else {
dfTempEnriched <- rbind(
dfTempEnriched,
egmt
)
}
}
}
if (nrow(dfTempEnriched) > 0){
dfTempEnriched[["direction"]] <- "positive"
dfTempEnriched[["log10FDR"]] <- -1*log10(dfTempEnriched$p.adjust)
dfTempEnriched <- dfTempEnriched[order(dfTempEnriched$log10FDR, decreasing = T),]
dfTempEnriched <- na.omit(dfTempEnriched)
if (nrow(dfTempEnriched) > topMaxCat){
dfTempEnriched <- dfTempEnriched[1:topMaxCat, ]
}
}
} # end positive
## Now the negative side ##
if (length(negTestGeneSet) >= 3){
first <- TRUE
for (k in 1:length(gmtList)){
egmt <- data.frame(
enricher(
posTestGeneSet,
TERM2GENE=gmtList[[k]],
pvalueCutoff = pvalueCutoff
)
)
if (!is.null(egmt)){
if (nrow(egmt) > 0){
egmt[["Collection"]] <- substr(names(gmtList)[k], 1,10)
}
if (first){
dfTempEnrichedNeg <- egmt
first <- FALSE
} else {
dfTempEnrichedNeg <- rbind(
dfTempEnrichedNeg,
egmt
)
}
}
}
if (nrow(dfTempEnrichedNeg) > 0){
dfTempEnrichedNeg[["direction"]] <- "negative"
dfTempEnrichedNeg[["log10FDR"]] <- log10(dfTempEnrichedNeg$p.adjust)
dfTempEnrichedNeg <- dfTempEnrichedNeg[order(dfTempEnrichedNeg$log10FDR, decreasing = F),]
dfTempEnrichedNeg <- na.omit(dfTempEnrichedNeg)
if (nrow(dfTempEnrichedNeg) > topMaxCat){
dfTempEnrichedNeg <- dfTempEnrichedNeg[1:topMaxCat, ]
}
}
} # end negative
## Make plot
if ((nrow(dfTempEnriched) > 0) | (nrow(dfTempEnrichedNeg) > 0)){
dfSel <- rbind(
dfTempEnriched,
dfTempEnrichedNeg
)
dfSel <- na.omit(dfSel)
dfSel <- dfSel[order(dfSel$log10FDR),]
dfSel$log10FDR <- round(dfSel$log10FDR, 2)
dfSel[["Category"]] <- ""
dfSel[dfSel$log10FDR >= 0, "Category"] <- "Enr."
dfSel[dfSel$log10FDR < 0, "Category"] <- "Depl."
for (l in 1:nrow(dfSel)){
if (nchar(dfSel[l, "ID"]) > 30){
part1 <- substr(dfSel[l, "ID"], 1, 30)
part2 <- substr(dfSel[l, "ID"], 31, 60)
dfSel[l, "ID"] <- paste0(part1, " \\n", part2)
}
}
#dfSel$Term <- gsub("\\(GO", "\\\n\\(GO", dfSel$Term)
dfSel$ID <- factor(dfSel$ID, levels = unique(dfSel$ID))
plotList[[paste0("PCA_ENR_", j)]] <- ggplot(
data=dfSel, aes(x= ID, y=log10FDR, fill=Collection, order=log10FDR)
) + geom_bar(stat="identity", colour="black"
) + coord_flip() +scale_fill_manual(values=enrCols
) + theme_bw(
) + theme(
axis.text.y = element_text(size=8),
axis.text.x = element_text(size=8),
axis.title.y = element_text(size=8),
axis.title.x = element_text(size=8),
axis.line = element_line(colour = "black"),
panel.border = element_rect(colour = "black", fill=NA, size=1),
plot.title = element_text(hjust = 0.5, size = 12)
) + labs(title = paste0("Cluster ", PCAdimensions[j]," enriched genes") ,y = "-log10(FDR)", x = ""
) + geom_hline(yintercept = c(-log10(0.05), log10(0.05)), color = "grey", size=0.5, lty=2
) + geom_hline(yintercept = 0, color = "black", size=0.5
)
cat(" \n")
## Save to file ##
FNbase <- paste0("PCA_Cluster_", PCAdimensions[j],".enriched.genes", VersionPdfExt)
FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
FNrel <- paste0("report_figures/", FNbase)
pdf(FN)
print(plotList[[paste0("PCA_ENR_", j)]])
dev.off()
link <- paste0(
'<a href="https://', urlString, '/',
Obio@parameterList$project_id,
'/category-view?category_type=GO-BP" target="_blank">CategoryView</a>'
)
## Create R markdown chunk ##
figLegend <- paste0(
'**Figure ',
figureCount,
'**: GO-BP category enrichment analysis for the top genes that have <font color = "',colVec[2],'"> the most positive </font> and <font color = "',colVec[1],'">the most negative</font> PCA loading values in dimension ',
PCAdimensions[j],
' associated with them. Download a pdf of this figure <a href="',FNrel,'" target="_blank">here</a>. To view these gene sets in the context of your data, go to ',link,' and find these categories using the search box.'
)
figureCount <- figureCount + 1
NewChnk <- paste0(
"#### ", PCAdimensions[j],
"\n```{r enrichr_",
j,", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
figLegend,"'}\n",
"\n",
"\n print(plotList[['",paste0("PCA_ENR_", j),"']])",
"\n cat( '\n')",
"\n\n\n```\n"
)
chnkVec <- c(
chnkVec,
NewChnk
)
}
## done with plot
} ## Done with per dimension loops
}
if (length(plotList) > 3){
tabVar <- ".tabset .tabset-fade .tabset-dropdown"
} else {
tabVar <- ".tabset .tabset-fade .tabset-pills"
}
###############################################################################
## Do category enrichment on clusters ##
cat(paste(knit(text = chnkVec, quiet = T), collapse = '\n'))
Figure 64: GO-BP category enrichment analysis for the top genes that have the most positive and the most negative PCA loading values in dimension PC_1 associated with them. Download a pdf of this figure here. To view these gene sets in the context of your data, go to CategoryView and find these categories using the search box.
Figure 65: GO-BP category enrichment analysis for the top genes that have the most positive and the most negative PCA loading values in dimension PC_2 associated with them. Download a pdf of this figure here. To view these gene sets in the context of your data, go to CategoryView and find these categories using the search box.
Figure 66: GO-BP category enrichment analysis for the top genes that have the most positive and the most negative PCA loading values in dimension PC_3 associated with them. Download a pdf of this figure here. To view these gene sets in the context of your data, go to CategoryView and find these categories using the search box.
Figure 67: GO-BP category enrichment analysis for the top genes that have the most positive and the most negative PCA loading values in dimension PC_4 associated with them. Download a pdf of this figure here. To view these gene sets in the context of your data, go to CategoryView and find these categories using the search box.
Figure 68: GO-BP category enrichment analysis for the top genes that have the most positive and the most negative PCA loading values in dimension PC_5 associated with them. Download a pdf of this figure here. To view these gene sets in the context of your data, go to CategoryView and find these categories using the search box.
Figure 69: GO-BP category enrichment analysis for the top genes that have the most positive and the most negative PCA loading values in dimension PC_6 associated with them. Download a pdf of this figure here. To view these gene sets in the context of your data, go to CategoryView and find these categories using the search box.
Figure 70: GO-BP category enrichment analysis for the top genes that have the most positive and the most negative PCA loading values in dimension PC_7 associated with them. Download a pdf of this figure here. To view these gene sets in the context of your data, go to CategoryView and find these categories using the search box.
Figure 71: GO-BP category enrichment analysis for the top genes that have the most positive and the most negative PCA loading values in dimension PC_8 associated with them. Download a pdf of this figure here. To view these gene sets in the context of your data, go to CategoryView and find these categories using the search box.
Figure 72: GO-BP category enrichment analysis for the top genes that have the most positive and the most negative PCA loading values in dimension PC_9 associated with them. Download a pdf of this figure here. To view these gene sets in the context of your data, go to CategoryView and find these categories using the search box.
Figure 73: GO-BP category enrichment analysis for the top genes that have the most positive and the most negative PCA loading values in dimension PC_10 associated with them. Download a pdf of this figure here. To view these gene sets in the context of your data, go to CategoryView and find these categories using the search box.
Figure 74: GO-BP category enrichment analysis for the top genes that have the most positive and the most negative PCA loading values in dimension PC_11 associated with them. Download a pdf of this figure here. To view these gene sets in the context of your data, go to CategoryView and find these categories using the search box.
Figure 75: GO-BP category enrichment analysis for the top genes that have the most positive and the most negative PCA loading values in dimension PC_12 associated with them. Download a pdf of this figure here. To view these gene sets in the context of your data, go to CategoryView and find these categories using the search box.
Figure 76: GO-BP category enrichment analysis for the top genes that have the most positive and the most negative PCA loading values in dimension PC_13 associated with them. Download a pdf of this figure here. To view these gene sets in the context of your data, go to CategoryView and find these categories using the search box.
Figure 77: GO-BP category enrichment analysis for the top genes that have the most positive and the most negative PCA loading values in dimension PC_14 associated with them. Download a pdf of this figure here. To view these gene sets in the context of your data, go to CategoryView and find these categories using the search box.
Figure 78: GO-BP category enrichment analysis for the top genes that have the most positive and the most negative PCA loading values in dimension PC_15 associated with them. Download a pdf of this figure here. To view these gene sets in the context of your data, go to CategoryView and find these categories using the search box.
Figure 79: GO-BP category enrichment analysis for the top genes that have the most positive and the most negative PCA loading values in dimension PC_16 associated with them. Download a pdf of this figure here. To view these gene sets in the context of your data, go to CategoryView and find these categories using the search box.
Figure 80: GO-BP category enrichment analysis for the top genes that have the most positive and the most negative PCA loading values in dimension PC_17 associated with them. Download a pdf of this figure here. To view these gene sets in the context of your data, go to CategoryView and find these categories using the search box.
Figure 81: GO-BP category enrichment analysis for the top genes that have the most positive and the most negative PCA loading values in dimension PC_18 associated with them. Download a pdf of this figure here. To view these gene sets in the context of your data, go to CategoryView and find these categories using the search box.
Figure 82: GO-BP category enrichment analysis for the top genes that have the most positive and the most negative PCA loading values in dimension PC_19 associated with them. Download a pdf of this figure here. To view these gene sets in the context of your data, go to CategoryView and find these categories using the search box.
Figure 83: GO-BP category enrichment analysis for the top genes that have the most positive and the most negative PCA loading values in dimension PC_20 associated with them. Download a pdf of this figure here. To view these gene sets in the context of your data, go to CategoryView and find these categories using the search box.
## Done doing enrichment on clusters ##
###############################################################################
## Add default ##
pos <- grep("addDmaps", names(Obio@parameterList))
if (length(pos) == 0){
Obio@parameterList[["addDmaps"]] <- FALSE
}
if (Obio@parameterList$addDmaps){
library(knitr)
library(ggplot2)
invertPT <- TRUE
plotList <- list()
chnkVec <- as.vector(NULL, mode = "character")
library(Seurat)
library(destiny)
dfPCA <- OsC@meta.data
dfPCA <- dfPCA[,grep("PC_", names(dfPCA))]
dmPCA <- DiffusionMap(dfPCA)
#dpt <- DPT(dm, tips = 268)
#dpt <- DPT(dm)
dpt <- DPT(dmPCA)
#pseudotime <- dpt$dpt
# Plot DC1 vs DC2 and color the cells by their inferred diffusion pseudotime.
# We can accesss diffusion pseudotime via dpt$dpt.
df <- data.frame(
DC1 = eigenvectors(dmPCA)[, 1],
DC2 = eigenvectors(dmPCA)[, 2],
DC3 = eigenvectors(dmPCA)[, 3],
"DM_Pseudotime" = dpt$dpt
)
df$cellID <- row.names(dfPCA)
## For this project reverse pseudotime ##
if (invertPT){
PTmax <- max(df$DM_Pseudotime)
df$DM_Pseudotime <- -1* (df$DM_Pseudotime - PTmax)
## Invert DC1, 2, 3
df$DC1 <- -1* df$DC1
df$DC2 <- -1* df$DC2
df$DC3 <- -1* df$DC3
}
## Add to table ##
df$cellID <- row.names(dfPCA)
## add here ##
OsC <- addDf2seuratMetaData(
obj = OsC,
dfAdd = df
)
## Create Pseudotime plot ##
dfTemp <- OsC@meta.data
#dotsize <- 0.5
dotcolor <- "darkblue"
tag <- "PC1PC2all"
plotList[[tag]] <- ggplot(dfTemp, aes(PC_1, PC_2, color=DM_Pseudotime)
)+ geom_point(
shape = 16,
size = as.numeric(dotsize)
) + xlab("PC1") + ylab("PC2") + scale_color_gradient(
"Pseudotime",
low="#ff6600",
high=dotcolor #,
#limits=c(0,maxExpr)
) + theme_bw(
) + theme(
axis.text.y = element_text(size=8),
axis.text.x = element_text(size=8),
axis.title.y = element_text(size=8),
axis.title.x = element_text(size=8),
axis.line = element_line(colour = "black"),
panel.border = element_rect(colour = "black", fill=NA, size=1),
plot.title = element_text(hjust = 0.5, size = 12)
) + ggtitle("PC1, PC2 and DM Pseudotime"
) #+ xlim(minX, maxX) + ylim(minY, maxY)
## Save to file ##
FNbase <- paste0("Pseudotime_overview", VersionPdfExt)
FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
FNrel <- paste0("report_figures/", FNbase)
pdf(FN)
print(plotList[[tag]])
dev.off()
## Create R markdown chunk ##
figLegend <- paste0(
'**Figure ',
figureCount,
'**: Figure depicting PCA components 1 and 2 with the diffusion map pseudotime highlighted in color. Download a pdf of this figure <a href="',FNrel,'" target="_blank">here</a>. '
)
figureCount <- figureCount + 1
NewChnk <- paste0(
"\n#### Pseudotime All Timepoints",
"\n```{r ", tag, ", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
figLegend,"'}\n",
"\n",
"\n print(plotList[['",tag,"']])",
"\n cat( '\n')",
"\n\n\n```\n"
)
chnkVec <- c(
chnkVec,
NewChnk
)
} # end dmap
if (length(plotList) > 3){
tabVar <- ".tabset .tabset-fade .tabset-dropdown"
} else {
tabVar <- ".tabset .tabset-fade .tabset-pills"
}
###############################################################################
## Do category enrichment on clusters ##
if (Obio@parameterList$addDmaps){
cat(paste(knit(text = chnkVec, quiet = T), collapse = '\n'))
}
Figure 84: Figure depicting PCA components 1 and 2 with the diffusion map pseudotime highlighted in color. Download a pdf of this figure here.
## Done doing enrichment on clusters ##
###############################################################################
### Will save Obio object here, so it can be re-used with different parameters
save(Obio,
file = paste0(
Obio@parameterList$localWorkDir,
Obio@parameterList$project_id,
".bioLOGIC.Robj"
)
)
print("Obio Object saved.")
save(OsC,
file = paste0(
Obio@parameterList$localWorkDir,
Obio@parameterList$project_id,
".Seurat.Robj"
)
)
## Try to retrieve project data from db ##
library(RMySQL)
db.pwd2 <- "_asf_"
db.user2 <- "asf"
host2 <- "ms1.thecrick.org"
projectParams <- Obio@documentationParams
tryCatch({
dbDB = dbConnect(drv = RMySQL::MySQL(), user = db.user2, password = db.pwd2, host = host2, dbname = "asf");
dfProposal = dbGetQuery(dbDB, paste0("SELECT * FROM asf_proposals WHERE project_name ='",Obio@parameterList$lims.id,"'"));
dbDisconnect(dbDB)
}, error = function(x) {
message("Project Database could not be reached or has no entry in Obio@parameterList$lims.id for this analysis.")
})
[1] TRUE
if (exists("dfProposal")){
if (nrow(dfProposal) == 1){
if (!is.na(dfProposal[1,"ProjectAlias"]) & dfProposal[1,"ProjectAlias"] != ""){
projectParams[["title"]] = paste0(dfProposal[1,"ProjectAlias"], " - ", dfProposal[1,"project_name"])
}
if (!is.na(dfProposal[1,"project_user"]) & dfProposal[1,"project_user"] != ""){
projectParams[["subtitle"]] = paste0(dfProposal[1,"user_lab"], " Lab - ", dfProposal[1,"project_user"])
projectParams[["subtitle"]] <- gsub("^ Lab - ", "", projectParams[["subtitle"]])
}
if (!is.na(dfProposal[1,"proposal_text"]) & dfProposal[1,"proposal_text"] != ""){
projectParams[["abstract"]] = dfProposal[1,"proposal_text"]
}
}
}
## Escape all special characters
projectParams <- lapply(
projectParams, function(x)
#gsub("([.|()\\^{}+$*?]|\\[|\\])", "\\\\\1", x)
gsub("([.|()/\\^{}+$*?]|\\[|\\])", " ", x)
)
projectParams <- lapply(
projectParams, function(x)
#gsub("([.|()\\^{}+$*?]|\\[|\\])", "\\\\\1", x)
gsub("\\\n", " ", x)
)
#projectParams$title <- "Title"
# projectParams$abstract <- "This is the QC section."
#projectParams$subtitle <- "Abstract"
sessionInfo()
## R version 3.6.0 (2019-04-26)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Linux 7 (Core)
##
## Matrix products: default
## BLAS: /camp/apps/misc/stp/babs/manual/software/r/R-3.6.0-foss-2016b/lib64/R/lib/libRblas.so
## LAPACK: /camp/apps/misc/stp/babs/manual/software/r/R-3.6.0-foss-2016b/lib64/R/lib/libRlapack.so
##
## locale:
## [1] LC_CTYPE=en_GB.utf-8 LC_NUMERIC=C
## [3] LC_TIME=en_GB.utf-8 LC_COLLATE=en_GB.utf-8
## [5] LC_MONETARY=en_GB.utf-8 LC_MESSAGES=en_GB.utf-8
## [7] LC_PAPER=en_GB.utf-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_GB.utf-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] destiny_2.14.0 gridExtra_2.3
## [3] ggpubr_0.4.0 GSEABase_1.46.0
## [5] graph_1.62.0 annotate_1.62.0
## [7] XML_3.99-0.3 AnnotationDbi_1.46.1
## [9] clusterProfiler_3.12.0 openxlsx_4.2.3
## [11] DT_0.17 AUCell_1.6.1
## [13] sleepwalk_0.3.1 ggtree_1.16.6
## [15] scales_1.1.1 ggrepel_0.8.2
## [17] RMySQL_0.10.20 DBI_1.1.1
## [19] DESeq2_1.24.0 SummarizedExperiment_1.14.1
## [21] DelayedArray_0.10.0 BiocParallel_1.18.1
## [23] matrixStats_0.58.0 Biobase_2.44.0
## [25] GenomicRanges_1.36.1 GenomeInfoDb_1.20.0
## [27] IRanges_2.18.3 S4Vectors_0.22.1
## [29] BiocGenerics_0.30.0 knitr_1.31
## [31] Seurat_3.1.5 forcats_0.5.1
## [33] stringr_1.4.0 dplyr_1.0.5
## [35] purrr_0.3.4 readr_1.4.0
## [37] tidyr_1.1.3 tibble_3.1.0
## [39] ggplot2_3.3.3 tidyverse_1.3.0
##
## loaded via a namespace (and not attached):
## [1] ggthemes_4.2.4 R.methodsS3_1.8.1 jrc_0.4.0
## [4] bit64_4.0.5 irlba_2.3.3 R.utils_2.10.1
## [7] data.table_1.14.0 rpart_4.1-15 RCurl_1.98-1.2
## [10] generics_0.1.0 cowplot_1.1.1 RSQLite_2.2.3
## [13] europepmc_0.4 RANN_2.6.1 proxy_0.4-25
## [16] future_1.21.0 bit_4.0.4 enrichplot_1.4.0
## [19] xml2_1.3.2 lubridate_1.7.10 httpuv_1.5.5
## [22] assertthat_0.2.1 viridis_0.5.1 xfun_0.21
## [25] hms_1.0.0 jquerylib_0.1.3 evaluate_0.14
## [28] promises_1.2.0.1 DEoptimR_1.0-8 progress_1.2.2
## [31] fansi_0.4.2 dbplyr_2.1.0 readxl_1.3.1
## [34] igraph_1.2.6 geneplotter_1.62.0 htmlwidgets_1.5.3
## [37] ellipsis_0.3.1 RSpectra_0.16-0 crosstalk_1.1.1
## [40] backports_1.2.1 vctrs_0.3.6 TTR_0.24.2
## [43] ROCR_1.0-11 abind_1.4-5 RcppEigen_0.3.3.9.1
## [46] cachem_1.0.4 withr_2.4.1 ggforce_0.3.2
## [49] triebeard_0.3.0 robustbase_0.93-7 vcd_1.4-8
## [52] checkmate_2.0.0 sctransform_0.3.2 treeio_1.8.2
## [55] xts_0.12.1 prettyunits_1.1.1 cluster_2.0.8
## [58] DOSE_3.10.2 ape_5.4-1 segmented_1.3-3
## [61] lazyeval_0.2.2 laeken_0.5.1 crayon_1.4.1
## [64] genefilter_1.66.0 pkgconfig_2.0.3 labeling_0.4.2
## [67] tweenr_1.0.1 nlme_3.1-139 nnet_7.3-12
## [70] rlang_0.4.10 globals_0.14.0 lifecycle_1.0.0
## [73] modelr_0.1.8 rsvd_1.0.3 cellranger_1.1.0
## [76] polyclip_1.10-0 lmtest_0.9-38 urltools_1.7.3
## [79] Matrix_1.3-2 carData_3.0-4 boot_1.3-22
## [82] zoo_1.8-8 reprex_1.0.0 base64enc_0.1-3
## [85] ggridges_0.5.3 png_0.1-7 viridisLite_0.3.0
## [88] bitops_1.0-6 R.oo_1.24.0 KernSmooth_2.23-15
## [91] blob_1.2.1 qvalue_2.16.0 parallelly_1.23.0
## [94] rstatix_0.7.0 gridGraphics_0.5-1 jpeg_0.1-8.1
## [97] ggsignif_0.6.1 memoise_2.0.0 magrittr_2.0.1
## [100] plyr_1.8.6 ica_1.0-2 zlibbioc_1.30.0
## [103] compiler_3.6.0 RColorBrewer_1.1-2 fitdistrplus_1.1-3
## [106] cli_2.3.1 XVector_0.24.0 listenv_0.8.0
## [109] patchwork_1.1.1 pbapply_1.4-3 ps_1.6.0
## [112] htmlTable_2.1.0 Formula_1.2-4 MASS_7.3-51.4
## [115] mgcv_1.8-28 tidyselect_1.1.0 stringi_1.5.3
## [118] highr_0.8 yaml_2.2.1 GOSemSim_2.10.0
## [121] locfit_1.5-9.4 latticeExtra_0.6-29 grid_3.6.0
## [124] sass_0.3.1 fastmatch_1.1-0 tools_3.6.0
## [127] rio_0.5.26 future.apply_1.7.0 rstudioapi_0.13
## [130] foreign_0.8-71 smoother_1.1 scatterplot3d_0.3-41
## [133] farver_2.1.0 Rtsne_0.15 ggraph_2.0.5
## [136] digest_0.6.27 rvcheck_0.1.8 BiocManager_1.30.10
## [139] shiny_1.6.0 Rcpp_1.0.6 car_3.0-10
## [142] broom_0.7.5 later_1.1.0.1 RcppAnnoy_0.0.18
## [145] httr_1.4.2 kernlab_0.9-29 colorspace_2.0-0
## [148] ranger_0.12.1 rvest_0.3.6 fs_1.5.0
## [151] reticulate_1.18 splines_3.6.0 uwot_0.1.10
## [154] tidytree_0.3.3 graphlayouts_0.7.1 sp_1.4-5
## [157] ggplotify_0.0.5 plotly_4.9.3 xtable_1.8-4
## [160] jsonlite_1.7.2 tidygraph_1.2.0 UpSetR_1.4.0
## [163] R6_2.5.0 Hmisc_4.5-0 pillar_1.5.1
## [166] htmltools_0.5.1.1 mime_0.10 VIM_6.1.0
## [169] glue_1.4.2 fastmap_1.1.0 class_7.3-15
## [172] codetools_0.2-16 fgsea_1.15.0 tsne_0.1-3
## [175] utf8_1.1.4 lattice_0.20-38 bslib_0.2.4
## [178] mixtools_1.2.0 curl_4.3 leiden_0.3.7
## [181] zip_2.1.1 GO.db_3.8.2 survival_3.2-7
## [184] rmarkdown_2.7 munsell_0.5.0 e1071_1.7-2
## [187] DO.db_2.9 GenomeInfoDbData_1.2.1 haven_2.3.1
## [190] reshape2_1.4.4 gtable_0.3.0
The Francis Crick Institute, stefan.boeing@crick.ac.uk↩